diff options
author | Kai Wasserbäch <[email protected]> | 2011-11-29 18:17:47 +0100 |
---|---|---|
committer | José Fonseca <[email protected]> | 2011-11-29 20:26:53 +0000 |
commit | ccd4d4367f2b4e5aebfc59b832599812a4a1c7d8 (patch) | |
tree | b5e510235d4be8dd4644a72c721693eaae02c1b5 /src/gallium/drivers/cell/ppu | |
parent | 09e67706e9a74600e16fe012ecfd192b0d31960a (diff) |
gallium/cell: Remove the driver.
Complicates Gallium3D development and doesn't seem to have active users.
Signed-off-by: Kai Wasserbäch <[email protected]>
Signed-off-by: José Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/drivers/cell/ppu')
42 files changed, 0 insertions, 11241 deletions
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile deleted file mode 100644 index c92f8e5cba2..00000000000 --- a/src/gallium/drivers/cell/ppu/Makefile +++ /dev/null @@ -1,86 +0,0 @@ -# Gallium3D Cell driver: PPU code - -# This makefile builds the libcell.a library which gets pulled into -# the main libGL.so library - - -TOP = ../../../../.. -include $(TOP)/configs/current - - -# This is the "top-level" cell PPU driver code, will get pulled into libGL.so -# by the winsys Makefile. -CELL_LIB = ../libcell.a - - -# This is the SPU code. We'd like to be able to put this into the libcell.a -# archive with the PPU code, but nesting .a libs doesn't seem to work. -# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile -SPU_CODE_MODULE = ../spu/g3d_spu.a - - -SOURCES = \ - cell_batch.c \ - cell_clear.c \ - cell_context.c \ - cell_draw_arrays.c \ - cell_fence.c \ - cell_flush.c \ - cell_gen_fragment.c \ - cell_gen_fp.c \ - cell_state_derived.c \ - cell_state_emit.c \ - cell_state_shader.c \ - cell_pipe_state.c \ - cell_screen.c \ - cell_state_vertex.c \ - cell_spu.c \ - cell_surface.c \ - cell_texture.c \ - cell_vbuf.c \ - cell_vertex_fetch.c \ - cell_vertex_shader.c - - -OBJECTS = $(SOURCES:.c=.o) \ - -INCLUDE_DIRS = \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/drivers - -.c.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ - - -.c.s: - $(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ - - -default: $(CELL_LIB) - - -$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) -# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work - ar -ru $(CELL_LIB) $(OBJECTS) - -#$(PROG): $(PPU_OBJECTS) -# $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS) - - - -clean: - rm -f *.o *~ $(CELL_LIB) - - - -depend: $(SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null - -include depend - - - diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c deleted file mode 100644 index fe144f8b849..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ /dev/null @@ -1,260 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_fence.h" -#include "cell_spu.h" - - - -/** - * Search the buffer pool for an empty/free buffer and return its index. - * Buffers are used for storing vertex data, state and commands which - * will be sent to the SPUs. - * If no empty buffers are available, wait for one. - * \return buffer index in [0, CELL_NUM_BUFFERS-1] - */ -uint -cell_get_empty_buffer(struct cell_context *cell) -{ - static uint prev_buffer = 0; - uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS; - uint tries = 0; - - /* Find a buffer that's marked as free by all SPUs */ - while (1) { - uint spu, num_free = 0; - - for (spu = 0; spu < cell->num_spus; spu++) { - if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) { - num_free++; - - if (num_free == cell->num_spus) { - /* found a free buffer, now mark status as used */ - for (spu = 0; spu < cell->num_spus; spu++) { - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; - } - /* - printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries); - */ - prev_buffer = buf; - - /* release tex buffer associated w/ prev use of this batch buf */ - cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]); - - return buf; - } - } - else { - break; - } - } - - /* try next buf */ - buf = (buf + 1) % CELL_NUM_BUFFERS; - - tries++; - if (tries == 100) { - /* - printf("PPU WAITING for buffer...\n"); - */ - } - } -} - - -/** - * Append a fence command to the current batch buffer. - * Note that we're sure there's always room for this because of the - * adjusted size check in cell_batch_free_space(). - */ -static void -emit_fence(struct cell_context *cell) -{ - const uint batch = cell->cur_batch; - const uint size = cell->buffer_size[batch]; - struct cell_command_fence *fence_cmd; - struct cell_fence *fence = &cell->fenced_buffers[batch].fence; - uint i; - - /* set fence status to emitted, not yet signalled */ - for (i = 0; i < cell->num_spus; i++) { - fence->status[i][0] = CELL_FENCE_EMITTED; - } - - STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0); - ASSERT(size % 16 == 0); - ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE); - - fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size); - fence_cmd->opcode[0] = CELL_CMD_FENCE; - fence_cmd->fence = fence; - - /* update batch buffer size */ - cell->buffer_size[batch] = size + sizeof(struct cell_command_fence); -} - - -/** - * Flush the current batch buffer to the SPUs. - * An empty buffer will be found and set as the new current batch buffer - * for subsequent commands/data. - */ -void -cell_batch_flush(struct cell_context *cell) -{ - static boolean flushing = FALSE; - uint batch = cell->cur_batch; - uint size = cell->buffer_size[batch]; - uint spu, cmd_word; - - assert(!flushing); - - if (size == 0) - return; - - /* Before we use this batch buffer, make sure any fenced texture buffers - * are released. - */ - if (cell->fenced_buffers[batch].head) { - emit_fence(cell); - size = cell->buffer_size[batch]; - } - - flushing = TRUE; - - assert(batch < CELL_NUM_BUFFERS); - - /* - printf("cell_batch_dispatch: buf %u at %p, size %u\n", - batch, &cell->buffer[batch][0], size); - */ - - /* - * Build "BATCH" command and send to all SPUs. - */ - cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); - - for (spu = 0; spu < cell->num_spus; spu++) { - assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED); - send_mbox_message(cell_global.spe_contexts[spu], cmd_word); - } - - /* When the SPUs are done copying the buffer into their locals stores - * they'll write a BUFFER_STATUS_FREE message into the buffer_status[] - * array indicating that the PPU can re-use the buffer. - */ - - batch = cell_get_empty_buffer(cell); - - cell->buffer_size[batch] = 0; /* empty */ - cell->cur_batch = batch; - - flushing = FALSE; -} - - -/** - * Return the number of bytes free in the current batch buffer. - */ -uint -cell_batch_free_space(const struct cell_context *cell) -{ - uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; - free -= sizeof(struct cell_command_fence); - return free; -} - - -/** - * Allocate space in the current batch buffer for 'bytes' space. - * Bytes must be a multiple of 16 bytes. Allocation will be 16 byte aligned. - * \return address in batch buffer to put data - */ -void * -cell_batch_alloc16(struct cell_context *cell, uint bytes) -{ - void *pos; - uint size; - - ASSERT(bytes % 16 == 0); - ASSERT(bytes <= CELL_BUFFER_SIZE); - ASSERT(cell->cur_batch >= 0); - -#ifdef ASSERT - { - uint spu; - for (spu = 0; spu < cell->num_spus; spu++) { - ASSERT(cell->buffer_status[spu][cell->cur_batch][0] - == CELL_BUFFER_STATUS_USED); - } - } -#endif - - size = cell->buffer_size[cell->cur_batch]; - - if (bytes > cell_batch_free_space(cell)) { - cell_batch_flush(cell); - size = 0; - } - - ASSERT(size % 16 == 0); - ASSERT(size + bytes <= CELL_BUFFER_SIZE); - - pos = (void *) (cell->buffer[cell->cur_batch] + size); - - cell->buffer_size[cell->cur_batch] = size + bytes; - - return pos; -} - - -/** - * One-time init of batch buffers. - */ -void -cell_init_batch_buffers(struct cell_context *cell) -{ - uint spu, buf; - - /* init command, vertex/index buffer info */ - for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { - cell->buffer_size[buf] = 0; - - /* init batch buffer status values, - * mark 0th buffer as used, rest as free. - */ - for (spu = 0; spu < cell->num_spus; spu++) { - if (buf == 0) - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; - else - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; - } - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h deleted file mode 100644 index 290136031a1..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_batch.h +++ /dev/null @@ -1,54 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_BATCH_H -#define CELL_BATCH_H - -#include "pipe/p_compiler.h" - - -struct cell_context; - - -extern uint -cell_get_empty_buffer(struct cell_context *cell); - -extern void -cell_batch_flush(struct cell_context *cell); - -extern uint -cell_batch_free_space(const struct cell_context *cell); - -extern void * -cell_batch_alloc16(struct cell_context *cell, uint bytes); - -extern void -cell_init_batch_buffers(struct cell_context *cell); - - -#endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c deleted file mode 100644 index 6a525ef4e41..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ /dev/null @@ -1,93 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Authors - * Brian Paul - */ - -#include <stdio.h> -#include <assert.h> -#include <stdint.h> -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_pack_color.h" -#include "cell/common.h" -#include "cell_clear.h" -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_state.h" - - -/** - * Called via pipe->clear() - */ -void -cell_clear(struct pipe_context *pipe, unsigned buffers, - const pipe_color_union *color, - double depth, unsigned stencil) -{ - struct cell_context *cell = cell_context(pipe); - - if (cell->dirty) - cell_update_derived(cell); - - if (buffers & PIPE_CLEAR_COLOR) { - uint surfIndex = 0; - union util_color uc; - - util_pack_color(color->f, cell->framebuffer.cbufs[0]->format, &uc); - - /* Build a CLEAR command and place it in the current batch buffer */ - STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) - cell_batch_alloc16(cell, sizeof(*clr)); - clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; - clr->surface = surfIndex; - clr->value = uc.ui; - } - - if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { - uint surfIndex = 1; - uint clearValue; - - clearValue = util_pack_z_stencil(cell->framebuffer.zsbuf->format, - depth, stencil); - - /* Build a CLEAR command and place it in the current batch buffer */ - STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) - cell_batch_alloc16(cell, sizeof(*clr)); - clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; - clr->surface = surfIndex; - clr->value = clearValue; - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h deleted file mode 100644 index a365feb0f00..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_clear.h +++ /dev/null @@ -1,42 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_CLEAR_H -#define CELL_CLEAR_H - - -struct pipe_context; - - -extern void -cell_clear(struct pipe_context *pipe, unsigned buffers, - const union pipe_color_union *color, - double depth, unsigned stencil); - - -#endif /* CELL_CLEAR_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c deleted file mode 100644 index 58e647a39fa..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Authors - * Brian Paul - */ - - -#include <stdio.h> - -#include "pipe/p_defines.h" -#include "pipe/p_format.h" -#include "util/u_memory.h" -#include "pipe/p_screen.h" -#include "util/u_inlines.h" - -#include "draw/draw_context.h" -#include "draw/draw_private.h" - -#include "cell/common.h" -#include "cell_batch.h" -#include "cell_clear.h" -#include "cell_context.h" -#include "cell_draw_arrays.h" -#include "cell_fence.h" -#include "cell_flush.h" -#include "cell_state.h" -#include "cell_surface.h" -#include "cell_spu.h" -#include "cell_pipe_state.h" -#include "cell_texture.h" -#include "cell_vbuf.h" - - - -static void -cell_destroy_context( struct pipe_context *pipe ) -{ - struct cell_context *cell = cell_context(pipe); - unsigned i; - - for (i = 0; i < cell->num_vertex_buffers; i++) { - pipe_resource_reference(&cell->vertex_buffer[i].buffer, NULL); - } - - util_delete_keymap(cell->fragment_ops_cache, NULL); - - cell_spu_exit(cell); - - align_free(cell); -} - - -static struct draw_context * -cell_draw_create(struct cell_context *cell) -{ - struct draw_context *draw = draw_create(&cell->pipe); - -#if 0 /* broken */ - if (getenv("GALLIUM_CELL_VS")) { - /* plug in SPU-based vertex transformation code */ - draw->shader_queue_flush = cell_vertex_shader_queue_flush; - draw->driver_private = cell; - } -#endif - - return draw; -} - - -static const struct debug_named_value cell_debug_flags[] = { - {"checker", CELL_DEBUG_CHECKER, NULL},/**< modulate tile clear color by SPU ID */ - {"asm", CELL_DEBUG_ASM, NULL}, /**< dump SPU asm code */ - {"sync", CELL_DEBUG_SYNC, NULL}, /**< SPUs do synchronous DMA */ - {"fragops", CELL_DEBUG_FRAGMENT_OPS, NULL}, /**< SPUs emit fragment ops debug messages*/ - {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK, NULL}, /**< SPUs use reference implementation for fragment ops*/ - {"cmd", CELL_DEBUG_CMD, NULL}, /**< SPUs dump command buffer info */ - {"cache", CELL_DEBUG_CACHE, NULL}, /**< report texture cache stats on exit */ - DEBUG_NAMED_VALUE_END -}; - - -struct pipe_context * -cell_create_context(struct pipe_screen *screen, - void *priv ) -{ - struct cell_context *cell; - uint i; - - /* some fields need to be 16-byte aligned, so align the whole object */ - cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); - if (!cell) - return NULL; - - memset(cell, 0, sizeof(*cell)); - - cell->winsys = NULL; /* XXX: fixme - get this from screen? */ - cell->pipe.winsys = NULL; - cell->pipe.screen = screen; - cell->pipe.priv = priv; - cell->pipe.destroy = cell_destroy_context; - - cell->pipe.clear = cell_clear; - cell->pipe.flush = cell_flush; - -#if 0 - cell->pipe.begin_query = cell_begin_query; - cell->pipe.end_query = cell_end_query; - cell->pipe.wait_query = cell_wait_query; -#endif - - cell_init_draw_functions(cell); - cell_init_state_functions(cell); - cell_init_shader_functions(cell); - cell_init_surface_functions(cell); - cell_init_vertex_functions(cell); - cell_init_texture_transfer_funcs(cell); - - cell->draw = cell_draw_create(cell); - - /* Create cache of fragment ops generated code */ - cell->fragment_ops_cache = - util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL); - - cell_init_vbuf(cell); - - draw_set_rasterize_stage(cell->draw, cell->vbuf); - - /* convert all points/lines to tris for the time being */ - draw_wide_point_threshold(cell->draw, 0.0); - draw_wide_line_threshold(cell->draw, 0.0); - - /* get env vars or read config file to get debug flags */ - cell->debug_flags = debug_get_flags_option("CELL_DEBUG", - cell_debug_flags, - 0 ); - - for (i = 0; i < CELL_NUM_BUFFERS; i++) - cell_fence_init(&cell->fenced_buffers[i].fence); - - - /* - * SPU stuff - */ - /* This call only works with SDK 3.0. Anyone still using 2.1??? */ - cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1); - cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); - if (cell->debug_flags) { - printf("Cell: found %d Cell(s) with %u SPUs\n", - cell->num_cells, cell->num_spus); - } - if (getenv("CELL_NUM_SPUS")) { - cell->num_spus = atoi(getenv("CELL_NUM_SPUS")); - assert(cell->num_spus > 0); - } - - cell_start_spus(cell); - - cell_init_batch_buffers(cell); - - /* make sure SPU initializations are done before proceeding */ - cell_flush_int(cell, CELL_FLUSH_WAIT); - - return &cell->pipe; -} diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h deleted file mode 100644 index d1aee62ba1e..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ /dev/null @@ -1,210 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_CONTEXT_H -#define CELL_CONTEXT_H - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "draw/draw_vertex.h" -#include "draw/draw_vbuf.h" -/*#include "cell_winsys.h"*/ -#include "cell/common.h" -#include "rtasm/rtasm_ppc_spe.h" -#include "tgsi/tgsi_scan.h" -#include "util/u_keymap.h" - - -struct cell_vbuf_render; - - -/** - * Cell vertex shader state, subclass of pipe_shader_state. - */ -struct cell_vertex_shader_state -{ - struct pipe_shader_state shader; - struct tgsi_shader_info info; - void *draw_data; -}; - - -/** - * Cell fragment shader state, subclass of pipe_shader_state. - */ -struct cell_fragment_shader_state -{ - struct pipe_shader_state shader; - struct tgsi_shader_info info; - struct spe_function code; - void *data; -}; - - -/** - * Key for mapping per-fragment state to cached SPU machine code. - * keymap(cell_fragment_ops_key) => cell_command_fragment_ops - */ -struct cell_fragment_ops_key -{ - struct pipe_blend_state blend; - struct pipe_blend_color blend_color; - struct pipe_depth_stencil_alpha_state dsa; - enum pipe_format color_format; - enum pipe_format zs_format; -}; - - -struct cell_buffer_node; - -/** - * Fenced buffer list. List of buffers which can be unreferenced after - * the fence has been executed/signalled. - */ -struct cell_buffer_list -{ - PIPE_ALIGN_VAR(16) struct cell_fence fence; - struct cell_buffer_node *head; -}; - -struct cell_velems_state -{ - unsigned count; - struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; -}; - -/** - * Per-context state, subclass of pipe_context. - */ -struct cell_context -{ - struct pipe_context pipe; - - struct cell_winsys *winsys; - - const struct pipe_blend_state *blend; - const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - uint num_samplers; - const struct pipe_depth_stencil_alpha_state *depth_stencil; - const struct pipe_rasterizer_state *rasterizer; - const struct cell_vertex_shader_state *vs; - const struct cell_fragment_shader_state *fs; - const struct cell_velems_state *velems; - - struct spe_function logic_op; - - struct pipe_blend_color blend_color; - struct pipe_stencil_ref stencil_ref; - struct pipe_clip_state clip; - struct pipe_resource *constants[2]; - struct pipe_framebuffer_state framebuffer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct cell_resource *texture[PIPE_MAX_SAMPLERS]; - struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; - uint num_textures; - struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - uint num_vertex_buffers; - struct pipe_index_buffer index_buffer; - - ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; - ubyte *zsbuf_map; - - uint dirty; - uint dirty_textures; /* bitmask of texture units */ - uint dirty_samplers; /* bitmask of sampler units */ - - /** Cache of code generated for per-fragment ops */ - struct keymap *fragment_ops_cache; - - /** The primitive drawing context */ - struct draw_context *draw; - struct draw_stage *render_stage; - - /** For post-transformed vertex buffering: */ - struct cell_vbuf_render *vbuf_render; - struct draw_stage *vbuf; - - struct vertex_info vertex_info; - - /** Mapped constant buffers */ - const void *mapped_constants[PIPE_SHADER_TYPES]; - - PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; - - uint num_cells, num_spus; - - /** Buffers for command batches, vertex/index data */ - uint buffer_size[CELL_NUM_BUFFERS]; - PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]; - - int cur_batch; /**< which buffer is being filled w/ commands */ - - /** [4] to ensure 16-byte alignment for each status word */ - PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]; - - - /** Associated with each command/batch buffer is a list of pipe_buffers - * that are fenced. When the last command in a buffer is executed, the - * fence will be signalled, indicating that any pipe_buffers preceeding - * that fence can be unreferenced (and probably freed). - */ - struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS]; - - - struct spe_function attrib_fetch; - unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; - - unsigned debug_flags; -}; - - - - -static INLINE struct cell_context * -cell_context(struct pipe_context *pipe) -{ - return (struct cell_context *) pipe; -} - - -struct pipe_context * -cell_create_context(struct pipe_screen *screen, - void *priv ); - -extern void -cell_vertex_shader_queue_flush(struct draw_context *draw); - - -/* XXX find a better home for this */ -extern void cell_update_vertex_fetch(struct draw_context *draw); - - -#endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c deleted file mode 100644 index a367fa3fe15..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ /dev/null @@ -1,113 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Author: - * Brian Paul - * Keith Whitwell - */ - - -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "util/u_inlines.h" - -#include "cell_context.h" -#include "cell_draw_arrays.h" -#include "cell_state.h" -#include "cell_flush.h" -#include "cell_texture.h" - -#include "draw/draw_context.h" - - - - - - -/** - * Draw vertex arrays, with optional indexing. - * Basically, map the vertex buffers (and drawing surfaces), then hand off - * the drawing to the 'draw' module. - * - * XXX should the element buffer be specified/bound with a separate function? - */ -static void -cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) -{ - struct cell_context *cell = cell_context(pipe); - struct draw_context *draw = cell->draw; - void *mapped_indices = NULL; - unsigned i; - - if (cell->dirty) - cell_update_derived( cell ); - -#if 0 - cell_map_surfaces(cell); -#endif - - /* - * Map vertex buffers - */ - for (i = 0; i < cell->num_vertex_buffers; i++) { - void *buf = cell_resource(cell->vertex_buffer[i].buffer)->data; - draw_set_mapped_vertex_buffer(draw, i, buf); - } - /* Map index buffer, if present */ - if (info->indexed && cell->index_buffer.buffer) - mapped_indices = cell_resource(cell->index_buffer.buffer)->data; - - draw_set_mapped_index_buffer(draw, mapped_indices); - - /* draw! */ - draw_vbo(draw, info); - - /* - * unmap vertex/index buffers - will cause draw module to flush - */ - for (i = 0; i < cell->num_vertex_buffers; i++) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - if (mapped_indices) { - draw_set_mapped_index_buffer(draw, NULL); - } - - /* - * TODO: Flush only when a user vertex/index buffer is present - * (or even better, modify draw module to do this - * internally when this condition is seen?) - */ - draw_flush(draw); -} - - -void -cell_init_draw_functions(struct cell_context *cell) -{ - cell->pipe.draw_vbo = cell_draw_vbo; -} - diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h deleted file mode 100644 index 148873aa675..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h +++ /dev/null @@ -1,36 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_DRAW_ARRAYS_H -#define CELL_DRAW_ARRAYS_H - - -extern void -cell_init_draw_functions(struct cell_context *cell); - - -#endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c deleted file mode 100644 index 181fef44f45..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_fence.c +++ /dev/null @@ -1,172 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include <unistd.h> -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_fence.h" -#include "cell_texture.h" - - -void -cell_fence_init(struct cell_fence *fence) -{ - uint i; - ASSERT_ALIGN16(fence->status); - for (i = 0; i < CELL_MAX_SPUS; i++) { - fence->status[i][0] = CELL_FENCE_IDLE; - } -} - - -boolean -cell_fence_signalled(const struct cell_context *cell, - const struct cell_fence *fence) -{ - uint i; - for (i = 0; i < cell->num_spus; i++) { - if (fence->status[i][0] != CELL_FENCE_SIGNALLED) - return FALSE; - /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/ - } - return TRUE; -} - - -boolean -cell_fence_finish(const struct cell_context *cell, - const struct cell_fence *fence, - uint64_t timeout) -{ - while (!cell_fence_signalled(cell, fence)) { - usleep(10); - } - -#ifdef DEBUG - { - uint i; - for (i = 0; i < cell->num_spus; i++) { - assert(fence->status[i][0] == CELL_FENCE_SIGNALLED); - } - } -#endif - return TRUE; -} - - - - -struct cell_buffer_node -{ - struct pipe_resource *buffer; - struct cell_buffer_node *next; -}; - - -#if 0 -static void -cell_add_buffer_to_list(struct cell_context *cell, - struct cell_buffer_list *list, - struct pipe_resource *buffer) -{ - struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); - /* create new list node which references the buffer, insert at head */ - if (node) { - pipe_resource_reference(&node->buffer, buffer); - node->next = list->head; - list->head = node; - } -} -#endif - - -/** - * Wait for completion of the given fence, then unreference any buffers - * on the list. - * This typically unrefs/frees texture buffers after any rendering which uses - * them has completed. - */ -void -cell_free_fenced_buffers(struct cell_context *cell, - struct cell_buffer_list *list) -{ - if (list->head) { - /*struct pipe_screen *ps = cell->pipe.screen;*/ - struct cell_buffer_node *node; - - cell_fence_finish(cell, &list->fence); - - /* traverse the list, unreferencing buffers, freeing nodes */ - node = list->head; - while (node) { - struct cell_buffer_node *next = node->next; - assert(node->buffer); - /* XXX need this? pipe_buffer_unmap(ps, node->buffer);*/ -#if 0 - printf("Unref buffer %p\n", node->buffer); - if (node->buffer->reference.count == 1) - printf(" Delete!\n"); -#endif - pipe_resource_reference(&node->buffer, NULL); - FREE(node); - node = next; - } - list->head = NULL; - } -} - - -/** - * This should be called for each render command. - * Any texture buffers that are current bound will be added to a fenced - * list to be freed later when the fence is executed/signalled. - */ -void -cell_add_fenced_textures(struct cell_context *cell) -{ - /*struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];*/ - uint i; - - for (i = 0; i < cell->num_textures; i++) { - struct cell_resource *ct = cell->texture[i]; - if (ct) { -#if 0 - printf("Adding texture %p buffer %p to list\n", - ct, ct->tiled_buffer[level]); -#endif -#if 00 - /* XXX this needs to be fixed/restored! - * Maybe keep pointers to textures, not buffers. - */ - if (ct->base.buffer) - cell_add_buffer_to_list(cell, list, ct->buffer); -#endif - } - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h deleted file mode 100644 index 3568230b1c0..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_fence.h +++ /dev/null @@ -1,60 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_FENCE_H -#define CELL_FENCE_H - - -extern void -cell_fence_init(struct cell_fence *fence); - - -extern boolean -cell_fence_signalled(const struct cell_context *cell, - const struct cell_fence *fence, - unsigned flags); - - -extern boolean -cell_fence_finish(const struct cell_context *cell, - const struct cell_fence *fence, - unsigned flags, - uint64_t timeout); - - - -extern void -cell_free_fenced_buffers(struct cell_context *cell, - struct cell_buffer_list *list); - - -extern void -cell_add_fenced_textures(struct cell_context *cell); - - -#endif /* CELL_FENCE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c deleted file mode 100644 index 463f4d03eb9..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ /dev/null @@ -1,109 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_render.h" -#include "draw/draw_context.h" - - -/** - * Called via pipe->flush() - */ -void -cell_flush(struct pipe_context *pipe, - struct pipe_fence_handle **fence) -{ - struct cell_context *cell = cell_context(pipe); - - if (fence) { - *fence = NULL; - } - - flags |= CELL_FLUSH_WAIT; - - draw_flush( cell->draw ); - cell_flush_int(cell, flags); -} - - -/** - * Cell internal flush function. Send the current batch buffer to all SPUs. - * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle. - * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero - */ -void -cell_flush_int(struct cell_context *cell, unsigned flags) -{ - static boolean flushing = FALSE; /* recursion catcher */ - uint i; - - ASSERT(!flushing); - flushing = TRUE; - - if (flags & CELL_FLUSH_WAIT) { - STATIC_ASSERT(sizeof(opcode_t) % 16 == 0); - opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t)); - *cmd[0] = CELL_CMD_FINISH; - } - - cell_batch_flush(cell); - -#if 0 - /* Send CMD_FINISH to all SPUs */ - for (i = 0; i < cell->num_spus; i++) { - send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH); - } -#endif - - if (flags & CELL_FLUSH_WAIT) { - /* Wait for ack */ - for (i = 0; i < cell->num_spus; i++) { - uint k = wait_mbox_message(cell_global.spe_contexts[i]); - assert(k == CELL_CMD_FINISH); - } - } - - flushing = FALSE; -} - - -void -cell_flush_buffer_range(struct cell_context *cell, void *ptr, - unsigned size) -{ - STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0); - uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell, - sizeof(opcode_t) + sizeof(struct cell_buffer_range)); - struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4]; - batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE; - br->base = (uintptr_t) ptr; - br->size = size; -} diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h deleted file mode 100644 index 509ae6239ac..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ /dev/null @@ -1,45 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_FLUSH -#define CELL_FLUSH - -#define CELL_FLUSH_WAIT 0x80000000 - -extern void -cell_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence); - -extern void -cell_flush_int(struct cell_context *cell, unsigned flags); - -extern void -cell_flush_buffer_range(struct cell_context *cell, void *ptr, - unsigned size); - -#endif diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c deleted file mode 100644 index 1d8a11a4ac9..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ /dev/null @@ -1,2036 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -/** - * Generate SPU fragment program/shader code. - * - * Note that we generate SOA-style code here. So each TGSI instruction - * operates on four pixels (and is translated into four SPU instructions, - * generally speaking). - * - * \author Brian Paul - */ - -#include <math.h> -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_exec.h" -#include "tgsi/tgsi_dump.h" -#include "rtasm/rtasm_ppc_spe.h" -#include "util/u_memory.h" -#include "cell_context.h" -#include "cell_gen_fp.h" - - -#define MAX_TEMPS 16 -#define MAX_IMMED 8 - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - -/** - * Context needed during code generation. - */ -struct codegen -{ - struct cell_context *cell; - int inputs_reg; /**< 1st function parameter */ - int outputs_reg; /**< 2nd function parameter */ - int constants_reg; /**< 3rd function parameter */ - int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */ - int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */ - - int num_imm; /**< number of immediates */ - - int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */ - - int addr_reg; /**< address register, integer values */ - - /** Per-instruction temps / intermediate temps */ - int num_itemps; - int itemps[12]; - - /** Current IF/ELSE/ENDIF nesting level */ - int if_nesting; - /** Current BGNLOOP/ENDLOOP nesting level */ - int loop_nesting; - /** Location of start of current loop */ - int loop_start; - - /** Index of if/conditional mask register */ - int cond_mask_reg; - /** Index of loop mask register */ - int loop_mask_reg; - - /** Index of master execution mask register */ - int exec_mask_reg; - - /** KIL mask: indicates which fragments have been killed */ - int kill_mask_reg; - - int frame_size; /**< Stack frame size, in words */ - - struct spe_function *f; - boolean error; -}; - - -/** - * Allocate an intermediate temporary register. - */ -static int -get_itemp(struct codegen *gen) -{ - int t = spe_allocate_available_register(gen->f); - assert(gen->num_itemps < Elements(gen->itemps)); - gen->itemps[gen->num_itemps++] = t; - return t; -} - -/** - * Free all intermediate temporary registers. To be called after each - * instruction has been emitted. - */ -static void -free_itemps(struct codegen *gen) -{ - int i; - for (i = 0; i < gen->num_itemps; i++) { - spe_release_register(gen->f, gen->itemps[i]); - } - gen->num_itemps = 0; -} - - -/** - * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}. - * The register is allocated and initialized upon the first call. - */ -static int -get_const_one_reg(struct codegen *gen) -{ - if (gen->one_reg <= 0) { - gen->one_reg = spe_allocate_available_register(gen->f); - - spe_indent(gen->f, 4); - spe_comment(gen->f, -4, "init constant reg = 1.0:"); - - /* one = {1.0, 1.0, 1.0, 1.0} */ - spe_load_float(gen->f, gen->one_reg, 1.0f); - - spe_indent(gen->f, -4); - } - - return gen->one_reg; -} - - -/** - * Return index of the address register. - * Used for indirect register loads/stores. - */ -static int -get_address_reg(struct codegen *gen) -{ - if (gen->addr_reg <= 0) { - gen->addr_reg = spe_allocate_available_register(gen->f); - - spe_indent(gen->f, 4); - spe_comment(gen->f, -4, "init address reg = 0:"); - - /* init addr = {0, 0, 0, 0} */ - spe_zero(gen->f, gen->addr_reg); - - spe_indent(gen->f, -4); - } - - return gen->addr_reg; -} - - -/** - * Return index of the master execution mask. - * The register is allocated an initialized upon the first call. - * - * The master execution mask controls which pixels in a quad are - * modified, according to surrounding conditionals, loops, etc. - */ -static int -get_exec_mask_reg(struct codegen *gen) -{ - if (gen->exec_mask_reg <= 0) { - gen->exec_mask_reg = spe_allocate_available_register(gen->f); - - /* XXX this may not be needed */ - spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0"); - spe_load_int(gen->f, gen->exec_mask_reg, ~0); - } - - return gen->exec_mask_reg; -} - - -/** Return index of the conditional (if/else) execution mask register */ -static int -get_cond_mask_reg(struct codegen *gen) -{ - if (gen->cond_mask_reg <= 0) { - gen->cond_mask_reg = spe_allocate_available_register(gen->f); - } - - return gen->cond_mask_reg; -} - - -/** Return index of the loop execution mask register */ -static int -get_loop_mask_reg(struct codegen *gen) -{ - if (gen->loop_mask_reg <= 0) { - gen->loop_mask_reg = spe_allocate_available_register(gen->f); - } - - return gen->loop_mask_reg; -} - - - -static boolean -is_register_src(struct codegen *gen, int channel, - const struct tgsi_full_src_register *src) -{ - int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); - int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); - - if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { - return FALSE; - } - if (src->Register.File == TGSI_FILE_TEMPORARY || - src->Register.File == TGSI_FILE_IMMEDIATE) { - return TRUE; - } - return FALSE; -} - - -static boolean -is_memory_dst(struct codegen *gen, int channel, - const struct tgsi_full_dst_register *dst) -{ - if (dst->Register.File == TGSI_FILE_OUTPUT) { - return TRUE; - } - else { - return FALSE; - } -} - - -/** - * Return the index of the SPU temporary containing the named TGSI - * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we - * just return the corresponding SPE register. If the TGIS register - * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register - * and emit an SPE load instruction. - */ -static int -get_src_reg(struct codegen *gen, - int channel, - const struct tgsi_full_src_register *src) -{ - int reg = -1; - int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); - boolean reg_is_itemp = FALSE; - uint sign_op; - - assert(swizzle >= TGSI_SWIZZLE_X); - assert(swizzle <= TGSI_SWIZZLE_W); - - { - int index = src->Register.Index; - - assert(swizzle < 4); - - if (src->Register.Indirect) { - /* XXX unfinished */ - } - - switch (src->Register.File) { - case TGSI_FILE_TEMPORARY: - reg = gen->temp_regs[index][swizzle]; - break; - case TGSI_FILE_INPUT: - { - /* offset is measured in quadwords, not bytes */ - int offset = index * 4 + swizzle; - reg = get_itemp(gen); - reg_is_itemp = TRUE; - /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16); - } - break; - case TGSI_FILE_IMMEDIATE: - reg = gen->imm_regs[index][swizzle]; - break; - case TGSI_FILE_CONSTANT: - { - /* offset is measured in quadwords, not bytes */ - int offset = index * 4 + swizzle; - reg = get_itemp(gen); - reg_is_itemp = TRUE; - /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); - } - break; - default: - assert(0); - } - } - - /* - * Handle absolute value, negate or set-negative of src register. - */ - sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); - if (sign_op != TGSI_UTIL_SIGN_KEEP) { - /* - * All sign ops are done by manipulating bit 31, the IEEE float sign bit. - */ - const int bit31mask_reg = get_itemp(gen); - int result_reg; - - if (reg_is_itemp) { - /* re-use 'reg' for the result */ - result_reg = reg; - } - else { - /* alloc a new reg for the result */ - result_reg = get_itemp(gen); - } - - /* mask with bit 31 set, the rest cleared */ - spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); - - if (sign_op == TGSI_UTIL_SIGN_CLEAR) { - spe_andc(gen->f, result_reg, reg, bit31mask_reg); - } - else if (sign_op == TGSI_UTIL_SIGN_SET) { - spe_and(gen->f, result_reg, reg, bit31mask_reg); - } - else { - assert(sign_op == TGSI_UTIL_SIGN_TOGGLE); - spe_xor(gen->f, result_reg, reg, bit31mask_reg); - } - - reg = result_reg; - } - - return reg; -} - - -/** - * Return the index of an SPE register to use for the given TGSI register. - * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the - * corresponding SPE register is returned. If the TGSI register is - * TGSI_FILE_OUTPUT we allocate an intermediate temporary register. - * See store_dest_reg() below... - */ -static int -get_dst_reg(struct codegen *gen, - int channel, - const struct tgsi_full_dst_register *dest) -{ - int reg = -1; - - switch (dest->Register.File) { - case TGSI_FILE_TEMPORARY: - if (gen->if_nesting > 0 || gen->loop_nesting > 0) - reg = get_itemp(gen); - else - reg = gen->temp_regs[dest->Register.Index][channel]; - break; - case TGSI_FILE_OUTPUT: - reg = get_itemp(gen); - break; - default: - assert(0); - } - - return reg; -} - - -/** - * When a TGSI instruction is writing to an output register, this - * function emits the SPE store instruction to store the value_reg. - * \param value_reg the SPE register containing the value to store. - * This would have been returned by get_dst_reg(). - */ -static void -store_dest_reg(struct codegen *gen, - int value_reg, int channel, - const struct tgsi_full_dst_register *dest) -{ - /* - * XXX need to implement dst reg clamping/saturation - */ -#if 0 - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: - break; - case TGSI_SAT_MINUS_PLUS_ONE: - break; - default: - assert( 0 ); - } -#endif - - switch (dest->Register.File) { - case TGSI_FILE_TEMPORARY: - if (gen->if_nesting > 0 || gen->loop_nesting > 0) { - int d_reg = gen->temp_regs[dest->Register.Index][channel]; - int exec_reg = get_exec_mask_reg(gen); - /* Mix d with new value according to exec mask: - * d[i] = mask_reg[i] ? value_reg : d_reg - */ - spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg); - } - else { - /* we're not inside a condition or loop: do nothing special */ - - } - break; - case TGSI_FILE_OUTPUT: - { - /* offset is measured in quadwords, not bytes */ - int offset = dest->Register.Index * 4 + channel; - if (gen->if_nesting > 0 || gen->loop_nesting > 0) { - int exec_reg = get_exec_mask_reg(gen); - int curval_reg = get_itemp(gen); - /* First read the current value from memory: - * Load: curval = memory[(machine_reg) + offset] - */ - spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); - /* Mix curval with newvalue according to exec mask: - * d[i] = mask_reg[i] ? value_reg : d_reg - */ - spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); - /* Store: memory[(machine_reg) + offset] = curval */ - spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); - } - else { - /* Store: memory[(machine_reg) + offset] = reg */ - spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16); - } - } - break; - default: - assert(0); - } -} - - - -static void -emit_prologue(struct codegen *gen) -{ - gen->frame_size = 1024; /* XXX temporary, should be dynamic */ - - spe_comment(gen->f, 0, "Function prologue:"); - - /* save $lr on stack # stqd $lr,16($sp) */ - spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - - if (gen->frame_size >= 512) { - /* offset is too large for ai instruction */ - int offset_reg = spe_allocate_available_register(gen->f); - int sp_reg = spe_allocate_available_register(gen->f); - /* offset = -framesize */ - spe_load_int(gen->f, offset_reg, -gen->frame_size); - /* sp = $sp */ - spe_move(gen->f, sp_reg, SPE_REG_SP); - /* $sp = $sp + offset_reg */ - spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); - /* save $sp in stack frame */ - spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0); - /* clean up */ - spe_release_register(gen->f, offset_reg); - spe_release_register(gen->f, sp_reg); - } - else { - /* save stack pointer # stqd $sp,-frameSize($sp) */ - spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); - - /* adjust stack pointer # ai $sp,$sp,-frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); - } -} - - -static void -emit_epilogue(struct codegen *gen) -{ - const int return_reg = 3; - - spe_comment(gen->f, 0, "Function epilogue:"); - - spe_comment(gen->f, 0, "return the killed mask"); - if (gen->kill_mask_reg > 0) { - /* shader called KIL, return the "alive" mask */ - spe_move(gen->f, return_reg, gen->kill_mask_reg); - } - else { - /* return {0,0,0,0} */ - spe_load_uint(gen->f, return_reg, 0); - } - - spe_comment(gen->f, 0, "restore stack and return"); - if (gen->frame_size >= 512) { - /* offset is too large for ai instruction */ - int offset_reg = spe_allocate_available_register(gen->f); - /* offset = framesize */ - spe_load_int(gen->f, offset_reg, gen->frame_size); - /* $sp = $sp + offset */ - spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); - /* clean up */ - spe_release_register(gen->f, offset_reg); - } - else { - /* restore stack pointer # ai $sp,$sp,frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); - } - - /* restore $lr # lqd $lr,16($sp) */ - spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - - /* return from function call */ - spe_bi(gen->f, SPE_REG_RA, 0, 0); -} - - -#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ - for (ch = 0; ch < 4; ch++) \ - if (inst->Dst[0].Register.WriteMask & (1 << ch)) - - -static boolean -emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch = 0, src_reg, addr_reg; - - src_reg = get_src_reg(gen, ch, &inst->Src[0]); - addr_reg = get_address_reg(gen); - - /* convert float to int */ - spe_cflts(gen->f, addr_reg, src_reg, 0); - - free_itemps(gen); - - return TRUE; -} - - -static boolean -emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, src_reg[4], dst_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (is_register_src(gen, ch, &inst->Src[0]) && - is_memory_dst(gen, ch, &inst->Dst[0])) { - /* special-case: register to memory store */ - store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]); - } - else { - spe_move(gen->f, dst_reg[ch], src_reg[ch]); - store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]); - } - } - - free_itemps(gen); - - return TRUE; -} - -/** - * Emit binary operation - */ -static boolean -emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], d_reg[4]; - - /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - /* Emit actual SPE instruction: d = s1 + s2 */ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ADD: - spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_SUB: - spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_MUL: - spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - default: - ; - } - } - - /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - /* Free any intermediate temps we allocated */ - free_itemps(gen); - - return TRUE; -} - - -/** - * Emit multiply add. See emit_ADD for comments. - */ -static boolean -emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - free_itemps(gen); - return TRUE; -} - - -/** - * Emit linear interpolate. See emit_ADD for comments. - */ -static boolean -emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; - - /* setup/get src/dst/temp regs */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* d = s3 + s1(s2 - s3) */ - /* do all subtracts, then all fma, then all stores to better pipeline */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - free_itemps(gen); - return TRUE; -} - - - -/** - * Emit reciprocal or recip sqrt. - */ -static boolean -emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4], tmp_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) { - /* tmp = 1/s1 */ - spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]); - } - else { - /* tmp = 1/sqrt(s1) */ - spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]); - } - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - /* d = float_interp(s1, tmp) */ - spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit absolute value. See emit_ADD for comments. - */ -static boolean -emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4]; - const int bit31mask_reg = get_itemp(gen); - - /* mask with bit 31 set, the rest cleared */ - spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - /* d = sign bit cleared in s1 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit 3 component dot product. See emit_ADD for comments. - */ -static boolean -emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int s1x_reg, s1y_reg, s1z_reg; - int s2x_reg, s2y_reg, s2z_reg; - int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - - s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - - /* t0 = x0 * x1 */ - spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); - - /* t1 = y0 * y1 */ - spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg); - - /* t0 = z0 * z1 + t0 */ - spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg); - - /* t0 = t0 + t1 */ - spe_fa(gen->f, t0_reg, t0_reg, t1_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit 4 component dot product. See emit_ADD for comments. - */ -static boolean -emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int s0x_reg, s0y_reg, s0z_reg, s0w_reg; - int s1x_reg, s1y_reg, s1z_reg, s1w_reg; - int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - - s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]); - s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); - - /* t0 = x0 * x1 */ - spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); - - /* t1 = y0 * y1 */ - spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg); - - /* t0 = z0 * z1 + t0 */ - spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg); - - /* t1 = w0 * w1 + t1 */ - spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg); - - /* t0 = t0 + t1 */ - spe_fa(gen->f, t0_reg, t0_reg, t1_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit homogeneous dot product. See emit_ADD for comments. - */ -static boolean -emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - /* XXX rewrite this function to look more like DP3/DP4 */ - int ch; - int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - int tmp_reg = get_itemp(gen); - - /* t = x0 * x1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - /* t = y0 * y1 + t */ - spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - /* t = z0 * z1 + t */ - spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); - /* t = w1 + t */ - spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - spe_move(gen->f, d_reg, tmp_reg); - store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit 3-component vector normalize. - */ -static boolean -emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int src_reg[3]; - int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - - src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]); - src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - - /* t0 = x * x */ - spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]); - - /* t1 = y * y */ - spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]); - - /* t0 = z * z + t0 */ - spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg); - - /* t0 = t0 + t1 */ - spe_fa(gen->f, t0_reg, t0_reg, t1_reg); - - /* t1 = 1.0 / sqrt(t0) */ - spe_frsqest(gen->f, t1_reg, t0_reg); - spe_fi(gen->f, t1_reg, t0_reg, t1_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - /* dst = src[ch] * t1 */ - spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit cross product. See emit_ADD for comments. - */ -static boolean -emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - int tmp_reg = get_itemp(gen); - - /* t = z0 * y1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - /* t = y0 * z1 - t */ - spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) { - store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]); - } - - s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - /* t = x0 * z1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - /* t = z0 * x1 - t */ - spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) { - store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]); - } - - s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - /* t = y0 * x1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - /* t = x0 * y1 - t */ - spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) { - store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit inequality instruction. - * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as - * the result but OpenGL/TGSI needs 0.0 and 1.0 results. - * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. - */ -static boolean -emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg; - boolean complement = FALSE; - - one_reg = get_const_one_reg(gen); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_SGT: - spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_SLT: - spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); - break; - case TGSI_OPCODE_SGE: - spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); - complement = TRUE; - break; - case TGSI_OPCODE_SLE: - spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - complement = TRUE; - break; - case TGSI_OPCODE_SEQ: - spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_SNE: - spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - complement = TRUE; - break; - default: - assert(0); - } - } - - /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - /* d = d & one_reg */ - if (complement) - spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]); - else - spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit compare. - */ -static boolean -emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s1_reg = get_src_reg(gen, ch, &inst->Src[0]); - int s2_reg = get_src_reg(gen, ch, &inst->Src[1]); - int s3_reg = get_src_reg(gen, ch, &inst->Src[2]); - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - int zero_reg = get_itemp(gen); - - spe_zero(gen->f, zero_reg); - - /* d = (s1 < 0) ? s2 : s3 */ - spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); - spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); - - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - free_itemps(gen); - } - - return TRUE; -} - -/** - * Emit trunc. - * Convert float to signed int - * Convert signed int to float - */ -static boolean -emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - /* Convert float to int */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0); - } - - /* Convert int to float */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit floor. - * If negative int subtract one - * Convert float to signed int - * Convert signed int to float - */ -static boolean -emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; - - zero_reg = get_itemp(gen); - spe_zero(gen->f, zero_reg); - one_reg = get_const_one_reg(gen); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* If negative, subtract 1.0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - /* Convert float to int */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); - } - - /* Convert int to float */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Compute frac = Input - FLR(Input) - */ -static boolean -emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; - - zero_reg = get_itemp(gen); - spe_zero(gen->f, zero_reg); - one_reg = get_const_one_reg(gen); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* If negative, subtract 1.0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - /* Convert float to int */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); - } - - /* Convert int to float */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0); - } - - /* d = s1 - FLR(s1) */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - /* store result */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -#if 0 -static void -print_functions(struct cell_context *cell) -{ - struct cell_spu_function_info *funcs = &cell->spu_functions; - uint i; - for (i = 0; i < funcs->num; i++) { - printf("SPU func %u: %s at %u\n", - i, funcs->names[i], funcs->addrs[i]); - } -} -#endif - - -static uint -lookup_function(struct cell_context *cell, const char *funcname) -{ - const struct cell_spu_function_info *funcs = &cell->spu_functions; - uint i, addr = 0; - for (i = 0; i < funcs->num; i++) { - if (strcmp(funcs->names[i], funcname) == 0) { - addr = funcs->addrs[i]; - } - } - assert(addr && "spu function not found"); - return addr / 4; /* discard 2 least significant bits */ -} - - -/** - * Emit code to call a SPU function. - * Used to implement instructions like SIN/COS/POW/TEX/etc. - * If scalar, only the X components of the src regs are used, and the - * result is replicated across the dest register's XYZW components. - */ -static boolean -emit_function_call(struct codegen *gen, - const struct tgsi_full_instruction *inst, - char *funcname, uint num_args, boolean scalar) -{ - const uint addr = lookup_function(gen->cell, funcname); - char comment[100]; - int s_regs[3]; - int func_called = FALSE; - uint a, ch; - int retval_reg = -1; - - assert(num_args <= 3); - - snprintf(comment, sizeof(comment), "CALL %s:", funcname); - spe_comment(gen->f, -4, comment); - - if (scalar) { - for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]); - } - /* we'll call the function, put the return value in this register, - * then replicate it across all write-enabled components in d_reg. - */ - retval_reg = spe_allocate_available_register(gen->f); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg; - ubyte usedRegs[SPE_NUM_REGS]; - uint i, numUsed; - - if (!scalar) { - for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]); - } - } - - d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - - if (!scalar || !func_called) { - /* for a scalar function, we'll really only call the function once */ - - numUsed = spe_get_registers_used(gen->f, usedRegs); - assert(numUsed < gen->frame_size / 16 - 2); - - /* save registers to stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - int offset = 2 + i; - spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - - /* setup function arguments */ - for (a = 0; a < num_args; a++) { - spe_move(gen->f, 3 + a, s_regs[a]); - } - - /* branch to function, save return addr */ - spe_brasl(gen->f, SPE_REG_RA, addr); - - /* save function's return value */ - if (scalar) - spe_move(gen->f, retval_reg, 3); - else - spe_move(gen->f, d_reg, 3); - - /* restore registers from stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - if (reg != d_reg && reg != retval_reg) { - int offset = 2 + i; - spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - } - - func_called = TRUE; - } - - if (scalar) { - spe_move(gen->f, d_reg, retval_reg); - } - - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - free_itemps(gen); - } - - if (scalar) { - spe_release_register(gen->f, retval_reg); - } - - return TRUE; -} - - -static boolean -emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const uint target = inst->Texture.Texture; - const uint unit = inst->Src[1].Register.Index; - uint addr; - int ch; - int coord_regs[4], d_regs[4]; - - switch (target) { - case TGSI_TEXTURE_1D: - case TGSI_TEXTURE_2D: - addr = lookup_function(gen->cell, "spu_tex_2d"); - break; - case TGSI_TEXTURE_3D: - addr = lookup_function(gen->cell, "spu_tex_3d"); - break; - case TGSI_TEXTURE_CUBE: - addr = lookup_function(gen->cell, "spu_tex_cube"); - break; - default: - ASSERT(0 && "unsupported texture target"); - return FALSE; - } - - assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER); - - spe_comment(gen->f, -4, "CALL tex:"); - - /* get src/dst reg info */ - for (ch = 0; ch < 4; ch++) { - coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - { - ubyte usedRegs[SPE_NUM_REGS]; - uint i, numUsed; - - numUsed = spe_get_registers_used(gen->f, usedRegs); - assert(numUsed < gen->frame_size / 16 - 2); - - /* save registers to stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - int offset = 2 + i; - spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - - /* setup function arguments (XXX depends on target) */ - for (i = 0; i < 4; i++) { - spe_move(gen->f, 3 + i, coord_regs[i]); - } - spe_load_uint(gen->f, 7, unit); /* sampler unit */ - - /* branch to function, save return addr */ - spe_brasl(gen->f, SPE_REG_RA, addr); - - /* save function's return values (four pixel's colors) */ - for (i = 0; i < 4; i++) { - spe_move(gen->f, d_regs[i], 3 + i); - } - - /* restore registers from stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - if (reg != d_regs[0] && - reg != d_regs[1] && - reg != d_regs[2] && - reg != d_regs[3]) { - int offset = 2 + i; - spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - } - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]); - free_itemps(gen); - } - - return TRUE; -} - - -/** - * KILL if any of src reg values are less than zero. - */ -static boolean -emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int s_regs[4], kil_reg = -1, cmp_reg, zero_reg; - - spe_comment(gen->f, -4, "CALL kil:"); - - /* zero = {0,0,0,0} */ - zero_reg = get_itemp(gen); - spe_zero(gen->f, zero_reg); - - cmp_reg = get_itemp(gen); - - /* get src regs */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); - } - - /* test if any src regs are < 0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (kil_reg >= 0) { - /* cmp = 0 > src ? : ~0 : 0 */ - spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); - /* kil = kil | cmp */ - spe_or(gen->f, kil_reg, kil_reg, cmp_reg); - } - else { - kil_reg = get_itemp(gen); - /* kil = 0 > src ? : ~0 : 0 */ - spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); - } - } - - if (gen->if_nesting || gen->loop_nesting) { - /* may have been a conditional kil */ - spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg); - } - - /* allocate the kill mask reg if needed */ - if (gen->kill_mask_reg <= 0) { - gen->kill_mask_reg = spe_allocate_available_register(gen->f); - spe_move(gen->f, gen->kill_mask_reg, kil_reg); - } - else { - spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg); - } - - free_itemps(gen); - - return TRUE; -} - - - -/** - * Emit min or max. - */ -static boolean -emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* d = (s0 > s1) ? s0 : s1 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (inst->Instruction.Opcode == TGSI_OPCODE_MAX) - spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]); - else - spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit code to update the execution mask. - * This needs to be done whenever the execution status of a conditional - * or loop is changed. - */ -static void -emit_update_exec_mask(struct codegen *gen) -{ - const int exec_reg = get_exec_mask_reg(gen); - const int cond_reg = gen->cond_mask_reg; - const int loop_reg = gen->loop_mask_reg; - - spe_comment(gen->f, 0, "Update master execution mask"); - - if (gen->if_nesting > 0 && gen->loop_nesting > 0) { - /* exec_mask = cond_mask & loop_mask */ - assert(cond_reg > 0); - assert(loop_reg > 0); - spe_and(gen->f, exec_reg, cond_reg, loop_reg); - } - else if (gen->if_nesting > 0) { - assert(cond_reg > 0); - spe_move(gen->f, exec_reg, cond_reg); - } - else if (gen->loop_nesting > 0) { - assert(loop_reg > 0); - spe_move(gen->f, exec_reg, loop_reg); - } - else { - spe_load_int(gen->f, exec_reg, ~0x0); - } -} - - -static boolean -emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int channel = 0; - int cond_reg; - - cond_reg = get_cond_mask_reg(gen); - - /* XXX push cond exec mask */ - - spe_comment(gen->f, 0, "init conditional exec mask = ~0:"); - spe_load_int(gen->f, cond_reg, ~0); - - /* update conditional execution mask with the predicate register */ - int tmp_reg = get_itemp(gen); - int s1_reg = get_src_reg(gen, channel, &inst->Src[0]); - - /* tmp = (s1_reg == 0) */ - spe_ceqi(gen->f, tmp_reg, s1_reg, 0); - /* tmp = !tmp */ - spe_complement(gen->f, tmp_reg, tmp_reg); - /* cond_mask = cond_mask & tmp */ - spe_and(gen->f, cond_reg, cond_reg, tmp_reg); - - gen->if_nesting++; - - /* update the master execution mask */ - emit_update_exec_mask(gen); - - free_itemps(gen); - - return TRUE; -} - - -static boolean -emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int cond_reg = get_cond_mask_reg(gen); - - spe_comment(gen->f, 0, "cond exec mask = !cond exec mask"); - spe_complement(gen->f, cond_reg, cond_reg); - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - /* XXX todo: pop cond exec mask */ - - gen->if_nesting--; - - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int exec_reg, loop_reg; - - exec_reg = get_exec_mask_reg(gen); - loop_reg = get_loop_mask_reg(gen); - - /* XXX push loop_exec mask */ - - spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0"); - spe_load_int(gen->f, loop_reg, ~0x0); - - gen->loop_nesting++; - gen->loop_start = spe_code_size(gen->f); /* in bytes */ - - return TRUE; -} - - -static boolean -emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int loop_reg = get_loop_mask_reg(gen); - const int tmp_reg = get_itemp(gen); - int offset; - - /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */ - spe_orx(gen->f, tmp_reg, loop_reg); - - offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */ - - /* branch back to top of loop if tmp_reg != 0 */ - spe_brnz(gen->f, tmp_reg, offset / 4); - - /* XXX pop loop_exec mask */ - - gen->loop_nesting--; - - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int exec_reg = get_exec_mask_reg(gen); - const int loop_reg = get_loop_mask_reg(gen); - - assert(gen->loop_nesting > 0); - - spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask"); - spe_andc(gen->f, loop_reg, loop_reg, exec_reg); - - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - assert(gen->loop_nesting > 0); - - return TRUE; -} - - -static boolean -emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, - boolean ddx) -{ - int ch; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s_reg = get_src_reg(gen, ch, &inst->Src[0]); - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - - int t1_reg = get_itemp(gen); - int t2_reg = get_itemp(gen); - - spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */ - if (ddx) { - spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */ - } - else { - spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */ - } - spe_fs(gen->f, d_reg, t2_reg, t1_reg); - - free_itemps(gen); - } - - return TRUE; -} - - - - -/** - * Emit END instruction. - * We just return from the shader function at this point. - * - * Note that there may be more code after this that would be - * called by TGSI_OPCODE_CALL. - */ -static boolean -emit_END(struct codegen *gen) -{ - emit_epilogue(gen); - return TRUE; -} - - -/** - * Emit code for the given instruction. Just a big switch stmt. - */ -static boolean -emit_instruction(struct codegen *gen, - const struct tgsi_full_instruction *inst) -{ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - return emit_ARL(gen, inst); - case TGSI_OPCODE_MOV: - return emit_MOV(gen, inst); - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_SUB: - case TGSI_OPCODE_MUL: - return emit_binop(gen, inst); - case TGSI_OPCODE_MAD: - return emit_MAD(gen, inst); - case TGSI_OPCODE_LRP: - return emit_LRP(gen, inst); - case TGSI_OPCODE_DP3: - return emit_DP3(gen, inst); - case TGSI_OPCODE_DP4: - return emit_DP4(gen, inst); - case TGSI_OPCODE_DPH: - return emit_DPH(gen, inst); - case TGSI_OPCODE_NRM: - return emit_NRM3(gen, inst); - case TGSI_OPCODE_XPD: - return emit_XPD(gen, inst); - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - return emit_RCP_RSQ(gen, inst); - case TGSI_OPCODE_ABS: - return emit_ABS(gen, inst); - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SNE: - return emit_inequality(gen, inst); - case TGSI_OPCODE_CMP: - return emit_CMP(gen, inst); - case TGSI_OPCODE_MIN: - case TGSI_OPCODE_MAX: - return emit_MIN_MAX(gen, inst); - case TGSI_OPCODE_TRUNC: - return emit_TRUNC(gen, inst); - case TGSI_OPCODE_FLR: - return emit_FLR(gen, inst); - case TGSI_OPCODE_FRC: - return emit_FRC(gen, inst); - case TGSI_OPCODE_END: - return emit_END(gen); - - case TGSI_OPCODE_COS: - return emit_function_call(gen, inst, "spu_cos", 1, TRUE); - case TGSI_OPCODE_SIN: - return emit_function_call(gen, inst, "spu_sin", 1, TRUE); - case TGSI_OPCODE_POW: - return emit_function_call(gen, inst, "spu_pow", 2, TRUE); - case TGSI_OPCODE_EX2: - return emit_function_call(gen, inst, "spu_exp2", 1, TRUE); - case TGSI_OPCODE_LG2: - return emit_function_call(gen, inst, "spu_log2", 1, TRUE); - case TGSI_OPCODE_TEX: - /* fall-through for now */ - case TGSI_OPCODE_TXD: - /* fall-through for now */ - case TGSI_OPCODE_TXB: - /* fall-through for now */ - case TGSI_OPCODE_TXL: - /* fall-through for now */ - case TGSI_OPCODE_TXP: - return emit_TEX(gen, inst); - case TGSI_OPCODE_KIL: - return emit_KIL(gen, inst); - - case TGSI_OPCODE_IF: - return emit_IF(gen, inst); - case TGSI_OPCODE_ELSE: - return emit_ELSE(gen, inst); - case TGSI_OPCODE_ENDIF: - return emit_ENDIF(gen, inst); - - case TGSI_OPCODE_BGNLOOP: - return emit_BGNLOOP(gen, inst); - case TGSI_OPCODE_ENDLOOP: - return emit_ENDLOOP(gen, inst); - case TGSI_OPCODE_BRK: - return emit_BRK(gen, inst); - case TGSI_OPCODE_CONT: - return emit_CONT(gen, inst); - - case TGSI_OPCODE_DDX: - return emit_DDX_DDY(gen, inst, TRUE); - case TGSI_OPCODE_DDY: - return emit_DDX_DDY(gen, inst, FALSE); - - /* XXX lots more cases to do... */ - - default: - fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n", - inst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - - - -/** - * Emit code for a TGSI immediate value (vector of four floats). - * This involves register allocation and initialization. - * XXX the initialization should be done by a "prepare" stage, not - * per quad execution! - */ -static boolean -emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) -{ - int ch; - - assert(gen->num_imm < MAX_TEMPS); - - for (ch = 0; ch < 4; ch++) { - float val = immed->u[ch].Float; - - if (ch > 0 && val == immed->u[ch - 1].Float) { - /* re-use previous register */ - gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1]; - } - else { - char str[100]; - int reg = spe_allocate_available_register(gen->f); - - if (reg < 0) - return FALSE; - - sprintf(str, "init $%d = %f", reg, val); - spe_comment(gen->f, 0, str); - - /* update immediate map */ - gen->imm_regs[gen->num_imm][ch] = reg; - - /* emit initializer instruction */ - spe_load_float(gen->f, reg, val); - } - } - - gen->num_imm++; - - return TRUE; -} - - - -/** - * Emit "code" for a TGSI declaration. - * We only care about TGSI TEMPORARY register declarations at this time. - * For each TGSI TEMPORARY we allocate four SPE registers. - */ -static boolean -emit_declaration(struct cell_context *cell, - struct codegen *gen, const struct tgsi_full_declaration *decl) -{ - int i, ch; - - switch (decl->Declaration.File) { - case TGSI_FILE_TEMPORARY: - for (i = decl->Range.First; - i <= decl->Range.Last; - i++) { - assert(i < MAX_TEMPS); - for (ch = 0; ch < 4; ch++) { - gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); - if (gen->temp_regs[i][ch] < 0) - return FALSE; /* out of regs */ - } - - /* XXX if we run out of SPE registers, we need to spill - * to SPU memory. someday... - */ - - { - char buf[100]; - sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i, - gen->temp_regs[i][0], gen->temp_regs[i][1], - gen->temp_regs[i][2], gen->temp_regs[i][3]); - spe_comment(gen->f, 0, buf); - } - } - break; - default: - ; /* ignore */ - } - - return TRUE; -} - - - -/** - * Translate TGSI shader code to SPE instructions. This is done when - * the state tracker gives us a new shader (via pipe->create_fs_state()). - * - * \param cell the rendering context (in) - * \param tokens the TGSI shader (in) - * \param f the generated function (out) - */ -boolean -cell_gen_fragment_program(struct cell_context *cell, - const struct tgsi_token *tokens, - struct spe_function *f) -{ - struct tgsi_parse_context parse; - struct codegen gen; - uint ic = 0; - - memset(&gen, 0, sizeof(gen)); - gen.cell = cell; - gen.f = f; - - /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ - gen.inputs_reg = 3; /* pointer to inputs array */ - gen.outputs_reg = 4; /* pointer to outputs array */ - gen.constants_reg = 5; /* pointer to constants array */ - - spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); - spe_allocate_register(f, gen.inputs_reg); - spe_allocate_register(f, gen.outputs_reg); - spe_allocate_register(f, gen.constants_reg); - - if (cell->debug_flags & CELL_DEBUG_ASM) { - spe_print_code(f, TRUE); - spe_indent(f, 2*8); - printf("Begin %s\n", __FUNCTION__); - tgsi_dump(tokens, 0); - } - - tgsi_parse_init(&parse, tokens); - - emit_prologue(&gen); - - while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - if (f->print) { - _debug_printf(" # "); - tgsi_dump_immediate(&parse.FullToken.FullImmediate); - } - if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) - gen.error = TRUE; - break; - - case TGSI_TOKEN_TYPE_DECLARATION: - if (f->print) { - _debug_printf(" # "); - tgsi_dump_declaration(&parse.FullToken.FullDeclaration); - } - if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration)) - gen.error = TRUE; - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (f->print) { - _debug_printf(" # "); - ic++; - tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); - } - if (!emit_instruction(&gen, &parse.FullToken.FullInstruction)) - gen.error = TRUE; - break; - - default: - assert(0); - } - } - - if (gen.error) { - /* terminate the SPE code */ - return emit_END(&gen); - } - - if (cell->debug_flags & CELL_DEBUG_ASM) { - printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); - printf("End %s\n", __FUNCTION__); - } - - tgsi_parse_free( &parse ); - - return !gen.error; -} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h deleted file mode 100644 index 99faea70462..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.h +++ /dev/null @@ -1,42 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -#ifndef CELL_GEN_FP_H -#define CELL_GEN_FP_H - - - -extern boolean -cell_gen_fragment_program(struct cell_context *cell, - const struct tgsi_token *tokens, - struct spe_function *f); - - -#endif /* CELL_GEN_FP_H */ - diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c deleted file mode 100644 index 76a85178007..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ /dev/null @@ -1,2189 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2009 VMware, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Generate SPU per-fragment code (actually per-quad code). - * \author Brian Paul - * \author Bob Ellison - */ - - -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "rtasm/rtasm_ppc_spe.h" -#include "cell_context.h" -#include "cell_gen_fragment.h" - - - -/** Do extra optimizations? */ -#define OPTIMIZATIONS 1 - - -/** - * Generate SPE code to perform Z/depth testing. - * - * \param dsa Gallium depth/stencil/alpha state to gen code for - * \param f SPE function to append instruction onto. - * \param mask_reg register containing quad/pixel "alive" mask (in/out) - * \param ifragZ_reg register containing integer fragment Z values (in) - * \param ifbZ_reg register containing integer frame buffer Z values (in/out) - * \param zmask_reg register containing result of Z test/comparison (out) - * - * Returns TRUE if the Z-buffer needs to be updated. - */ -static boolean -gen_depth_test(struct spe_function *f, - const struct pipe_depth_stencil_alpha_state *dsa, - int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) -{ - /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ - * quantities. This only makes a difference for 32-bit Z values though. - */ - ASSERT(dsa->depth.enabled); - - switch (dsa->depth.func) { - case PIPE_FUNC_EQUAL: - /* zmask = (ifragZ == ref) */ - spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & zmask) */ - spe_and(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_NOTEQUAL: - /* zmask = (ifragZ == ref) */ - spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & ~zmask) */ - spe_andc(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_GREATER: - /* zmask = (ifragZ > ref) */ - spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & zmask) */ - spe_and(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_LESS: - /* zmask = (ref > ifragZ) */ - spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); - /* mask = (mask & zmask) */ - spe_and(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_LEQUAL: - /* zmask = (ifragZ > ref) */ - spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & ~zmask) */ - spe_andc(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_GEQUAL: - /* zmask = (ref > ifragZ) */ - spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); - /* mask = (mask & ~zmask) */ - spe_andc(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_NEVER: - spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */ - spe_move(f, zmask_reg, mask_reg); /* zmask = mask */ - break; - - case PIPE_FUNC_ALWAYS: - /* mask unchanged */ - spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */ - break; - - default: - ASSERT(0); - break; - } - - if (dsa->depth.writemask) { - /* - * If (ztest passed) { - * framebufferZ = fragmentZ; - * } - * OR, - * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; - */ - spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); - return TRUE; - } - - return FALSE; -} - - -/** - * Generate SPE code to perform alpha testing. - * - * \param dsa Gallium depth/stencil/alpha state to gen code for - * \param f SPE function to append instruction onto. - * \param mask_reg register containing quad/pixel "alive" mask (in/out) - * \param fragA_reg register containing four fragment alpha values (in) - */ -static void -gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, int mask_reg, int fragA_reg) -{ - int ref_reg = spe_allocate_available_register(f); - int amask_reg = spe_allocate_available_register(f); - - ASSERT(dsa->alpha.enabled); - - if ((dsa->alpha.func != PIPE_FUNC_NEVER) && - (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { - /* load/splat the alpha reference float value */ - spe_load_float(f, ref_reg, dsa->alpha.ref_value); - } - - /* emit code to do the alpha comparison, updating 'mask' */ - switch (dsa->alpha.func) { - case PIPE_FUNC_EQUAL: - /* amask = (fragA == ref) */ - spe_fceq(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & amask) */ - spe_and(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_NOTEQUAL: - /* amask = (fragA == ref) */ - spe_fceq(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & ~amask) */ - spe_andc(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_GREATER: - /* amask = (fragA > ref) */ - spe_fcgt(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & amask) */ - spe_and(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_LESS: - /* amask = (ref > fragA) */ - spe_fcgt(f, amask_reg, ref_reg, fragA_reg); - /* mask = (mask & amask) */ - spe_and(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_LEQUAL: - /* amask = (fragA > ref) */ - spe_fcgt(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & ~amask) */ - spe_andc(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_GEQUAL: - /* amask = (ref > fragA) */ - spe_fcgt(f, amask_reg, ref_reg, fragA_reg); - /* mask = (mask & ~amask) */ - spe_andc(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_NEVER: - spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */ - break; - - case PIPE_FUNC_ALWAYS: - /* no-op, mask unchanged */ - break; - - default: - ASSERT(0); - break; - } - -#if OPTIMIZATIONS - /* if mask == {0,0,0,0} we're all done, return */ - { - /* re-use amask reg here */ - int tmp_reg = amask_reg; - /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ - spe_orx(f, tmp_reg, mask_reg); - /* if tmp[0] == 0 then return from function call */ - spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); - } -#endif - - spe_release_register(f, ref_reg); - spe_release_register(f, amask_reg); -} - - -/** - * This pair of functions is used inline to allocate and deallocate - * optional constant registers. Once a constant is discovered to be - * needed, we will likely need it again, so we don't want to deallocate - * it and have to allocate and load it again unnecessarily. - */ -static INLINE void -setup_optional_register(struct spe_function *f, - int *r) -{ - if (*r < 0) - *r = spe_allocate_available_register(f); -} - -static INLINE void -release_optional_register(struct spe_function *f, - int r) -{ - if (r >= 0) - spe_release_register(f, r); -} - -static INLINE void -setup_const_register(struct spe_function *f, - int *r, - float value) -{ - if (*r >= 0) - return; - setup_optional_register(f, r); - spe_load_float(f, *r, value); -} - -static INLINE void -release_const_register(struct spe_function *f, - int r) -{ - release_optional_register(f, r); -} - - - -/** - * Unpack/convert framebuffer colors from four 32-bit packed colors - * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). - * Each 8-bit color component is expanded into a float in [0.0, 1.0]. - */ -static void -unpack_colors(struct spe_function *f, - enum pipe_format color_format, - int fbRGBA_reg, - int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg) -{ - int mask0_reg = spe_allocate_available_register(f); - int mask1_reg = spe_allocate_available_register(f); - int mask2_reg = spe_allocate_available_register(f); - int mask3_reg = spe_allocate_available_register(f); - - spe_load_int(f, mask0_reg, 0xff); - spe_load_int(f, mask1_reg, 0xff00); - spe_load_int(f, mask2_reg, 0xff0000); - spe_load_int(f, mask3_reg, 0xff000000); - - spe_comment(f, 0, "Unpack framebuffer colors, convert to floats"); - - switch (color_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - /* fbB = fbRGBA & mask */ - spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg); - - /* fbG = fbRGBA & mask */ - spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg); - - /* fbR = fbRGBA & mask */ - spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg); - - /* fbA = fbRGBA & mask */ - spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg); - - /* fbG = fbG >> 8 */ - spe_roti(f, fbG_reg, fbG_reg, -8); - - /* fbR = fbR >> 16 */ - spe_roti(f, fbR_reg, fbR_reg, -16); - - /* fbA = fbA >> 24 */ - spe_roti(f, fbA_reg, fbA_reg, -24); - break; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - /* fbA = fbRGBA & mask */ - spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg); - - /* fbR = fbRGBA & mask */ - spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg); - - /* fbG = fbRGBA & mask */ - spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg); - - /* fbB = fbRGBA & mask */ - spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg); - - /* fbR = fbR >> 8 */ - spe_roti(f, fbR_reg, fbR_reg, -8); - - /* fbG = fbG >> 16 */ - spe_roti(f, fbG_reg, fbG_reg, -16); - - /* fbB = fbB >> 24 */ - spe_roti(f, fbB_reg, fbB_reg, -24); - break; - - default: - ASSERT(0); - } - - /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ - spe_cuflt(f, fbR_reg, fbR_reg, 8); - spe_cuflt(f, fbG_reg, fbG_reg, 8); - spe_cuflt(f, fbB_reg, fbB_reg, 8); - spe_cuflt(f, fbA_reg, fbA_reg, 8); - - spe_release_register(f, mask0_reg); - spe_release_register(f, mask1_reg); - spe_release_register(f, mask2_reg); - spe_release_register(f, mask3_reg); -} - - -/** - * Generate SPE code to implement the given blend mode for a quad of pixels. - * \param f SPE function to append instruction onto. - * \param fragR_reg register with fragment red values (float) (in/out) - * \param fragG_reg register with fragment green values (float) (in/out) - * \param fragB_reg register with fragment blue values (float) (in/out) - * \param fragA_reg register with fragment alpha values (float) (in/out) - * \param fbRGBA_reg register with packed framebuffer colors (integer) (in) - */ -static void -gen_blend(const struct pipe_blend_state *blend, - const struct pipe_blend_color *blend_color, - struct spe_function *f, - enum pipe_format color_format, - int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, - int fbRGBA_reg) -{ - int term1R_reg = spe_allocate_available_register(f); - int term1G_reg = spe_allocate_available_register(f); - int term1B_reg = spe_allocate_available_register(f); - int term1A_reg = spe_allocate_available_register(f); - - int term2R_reg = spe_allocate_available_register(f); - int term2G_reg = spe_allocate_available_register(f); - int term2B_reg = spe_allocate_available_register(f); - int term2A_reg = spe_allocate_available_register(f); - - int fbR_reg = spe_allocate_available_register(f); - int fbG_reg = spe_allocate_available_register(f); - int fbB_reg = spe_allocate_available_register(f); - int fbA_reg = spe_allocate_available_register(f); - - int tmp_reg = spe_allocate_available_register(f); - - /* Optional constant registers we might or might not end up using; - * if we do use them, make sure we only allocate them once by - * keeping a flag on each one. - */ - int one_reg = -1; - int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1; - - ASSERT(blend->rt[0].blend_enable); - - /* packed RGBA -> float colors */ - unpack_colors(f, color_format, fbRGBA_reg, - fbR_reg, fbG_reg, fbB_reg, fbA_reg); - - /* - * Compute Src RGB terms. We're actually looking for the value - * of (the appropriate RGB factors) * (the incoming source RGB color), - * because in some cases (like PIPE_BLENDFACTOR_ONE and - * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math. - */ - switch (blend->rt[0].rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - /* factors = (1,1,1), so term = (R,G,B) */ - spe_move(f, term1R_reg, fragR_reg); - spe_move(f, term1G_reg, fragG_reg); - spe_move(f, term1B_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_ZERO: - /* factors = (0,0,0), so term = (0,0,0) */ - spe_load_float(f, term1R_reg, 0.0f); - spe_load_float(f, term1G_reg, 0.0f); - spe_load_float(f, term1B_reg, 0.0f); - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factors = (R,G,B), so term = (R*R, G*G, B*B) */ - spe_fm(f, term1R_reg, fragR_reg, fragR_reg); - spe_fm(f, term1G_reg, fragG_reg, fragG_reg); - spe_fm(f, term1B_reg, fragB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - /* factors = (A,A,A), so term = (R*A, G*A, B*A) */ - spe_fm(f, term1R_reg, fragR_reg, fragA_reg); - spe_fm(f, term1G_reg, fragG_reg, fragA_reg); - spe_fm(f, term1B_reg, fragB_reg, fragA_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) - * or in other words term = (R-R*R, G-G*G, B-B*B) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ - spe_fm(f, term1R_reg, fragR_reg, fbR_reg); - spe_fm(f, term1G_reg, fragG_reg, fbG_reg); - spe_fm(f, term1B_reg, fragB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) - * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) - * or term = (R-R*A,G-G*A,B-B*A) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ - spe_fm(f, term1R_reg, fragR_reg, fbA_reg); - spe_fm(f, term1G_reg, fragG_reg, fbA_reg); - spe_fm(f, term1B_reg, fragB_reg, fbA_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) - * or term = (R-R*Afb,G-G*Afb,b-B*Afb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ - spe_fm(f, term1R_reg, fragR_reg, constR_reg); - spe_fm(f, term1G_reg, fragG_reg, constG_reg); - spe_fm(f, term1B_reg, fragB_reg, constB_reg); - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - /* we'll need the optional constant alpha register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ - spe_fm(f, term1R_reg, fragR_reg, constA_reg); - spe_fm(f, term1G_reg, fragG_reg, constA_reg); - spe_fm(f, term1B_reg, fragB_reg, constA_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) - * or term = (R-R*Rc, G-G*Gc, B-B*Bc) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) - * or term = (R-R*Ac,G-G*Ac,B-B*Ac) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* We'll need the optional {1,1,1,1} register */ - setup_const_register(f, &one_reg, 1.0f); - /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so - * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) - * We could expand the term (as a*min(b,c) == min(a*b,a*c) - * as long as a is positive), but then we'd have to do three - * spe_float_min() functions instead of one, so this is simpler. - */ - /* tmp = 1 - Afb */ - spe_fs(f, tmp_reg, one_reg, fbA_reg); - /* tmp = min(A,tmp) */ - spe_float_min(f, tmp_reg, fragA_reg, tmp_reg); - /* term = R*tmp */ - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - - default: - ASSERT(0); - } - - /* - * Compute Src Alpha term. Like the above, we're looking for - * the full term A*factor, not just the factor itself, because - * in many cases we can avoid doing unnecessary multiplies. - */ - switch (blend->rt[0].alpha_src_factor) { - case PIPE_BLENDFACTOR_ZERO: - /* factor = 0, so term = 0 */ - spe_load_float(f, term1A_reg, 0.0f); - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */ - case PIPE_BLENDFACTOR_ONE: - /* factor = 1, so term = A */ - spe_move(f, term1A_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factor = A, so term = A*A */ - spe_fm(f, term1A_reg, fragA_reg, fragA_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - spe_fm(f, term1A_reg, fragA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factor = 1-A, so term = A*(1-A) = A-A*A */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_DST_COLOR: - /* factor = Afb, so term = A*Afb */ - spe_fm(f, term1A_reg, fragA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = Ac, so term = A*Ac */ - spe_fm(f, term1A_reg, fragA_reg, constA_reg); - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - ASSERT(0); - } - - /* - * Compute Dest RGB term. Like the above, we're looking for - * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because - * in many cases we can avoid doing unnecessary multiplies. - */ - switch (blend->rt[0].rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */ - spe_move(f, term2R_reg, fbR_reg); - spe_move(f, term2G_reg, fbG_reg); - spe_move(f, term2B_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_ZERO: - /* factor s= (0,0,0), so term = (0,0,0) */ - spe_load_float(f, term2R_reg, 0.0f); - spe_load_float(f, term2G_reg, 0.0f); - spe_load_float(f, term2B_reg, 0.0f); - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */ - spe_fm(f, term2R_reg, fbR_reg, fragR_reg); - spe_fm(f, term2G_reg, fbG_reg, fragG_reg); - spe_fm(f, term2B_reg, fbB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B)) - * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg); - spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg); - spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */ - spe_fm(f, term2R_reg, fbR_reg, fragA_reg); - spe_fm(f, term2G_reg, fbG_reg, fragA_reg); - spe_fm(f, term2B_reg, fbB_reg, fragA_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */ - spe_fm(f, term2R_reg, fbR_reg, fbR_reg); - spe_fm(f, term2G_reg, fbG_reg, fbG_reg); - spe_fm(f, term2B_reg, fbB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb)) - * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: - /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */ - spe_fm(f, term2R_reg, fbR_reg, fbA_reg); - spe_fm(f, term2G_reg, fbG_reg, fbA_reg); - spe_fm(f, term2B_reg, fbB_reg, fbA_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb)) - * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */ - spe_fm(f, term2R_reg, fbR_reg, constR_reg); - spe_fm(f, term2G_reg, fbG_reg, constG_reg); - spe_fm(f, term2B_reg, fbB_reg, constB_reg); - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - /* we'll need the optional constant alpha register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */ - spe_fm(f, term2R_reg, fbR_reg, constA_reg); - spe_fm(f, term2G_reg, fbG_reg, constA_reg); - spe_fm(f, term2B_reg, fbB_reg, constA_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc)) - * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac)) - * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */ - ASSERT(0); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - - default: - ASSERT(0); - } - - /* - * Compute Dest Alpha term. Like the above, we're looking for - * the full term Afb*factor, not just the factor itself, because - * in many cases we can avoid doing unnecessary multiplies. - */ - switch (blend->rt[0].alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* factor = 1, so term = Afb */ - spe_move(f, term2A_reg, fbA_reg); - break; - case PIPE_BLENDFACTOR_ZERO: - /* factor = 0, so term = 0 */ - spe_load_float(f, term2A_reg, 0.0f); - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factor = A, so term = Afb*A */ - spe_fm(f, term2A_reg, fbA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_DST_COLOR: - /* factor = Afb, so term = Afb*Afb */ - spe_fm(f, term2A_reg, fbA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = Ac, so term = Afb*Ac */ - spe_fm(f, term2A_reg, fbA_reg, constA_reg); - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */ - ASSERT(0); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - ASSERT(0); - } - - /* - * Combine Src/Dest RGB terms as per the blend equation. - */ - switch (blend->rt[0].rgb_func) { - case PIPE_BLEND_ADD: - spe_fa(f, fragR_reg, term1R_reg, term2R_reg); - spe_fa(f, fragG_reg, term1G_reg, term2G_reg); - spe_fa(f, fragB_reg, term1B_reg, term2B_reg); - break; - case PIPE_BLEND_SUBTRACT: - spe_fs(f, fragR_reg, term1R_reg, term2R_reg); - spe_fs(f, fragG_reg, term1G_reg, term2G_reg); - spe_fs(f, fragB_reg, term1B_reg, term2B_reg); - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - spe_fs(f, fragR_reg, term2R_reg, term1R_reg); - spe_fs(f, fragG_reg, term2G_reg, term1G_reg); - spe_fs(f, fragB_reg, term2B_reg, term1B_reg); - break; - case PIPE_BLEND_MIN: - spe_float_min(f, fragR_reg, term1R_reg, term2R_reg); - spe_float_min(f, fragG_reg, term1G_reg, term2G_reg); - spe_float_min(f, fragB_reg, term1B_reg, term2B_reg); - break; - case PIPE_BLEND_MAX: - spe_float_max(f, fragR_reg, term1R_reg, term2R_reg); - spe_float_max(f, fragG_reg, term1G_reg, term2G_reg); - spe_float_max(f, fragB_reg, term1B_reg, term2B_reg); - break; - default: - ASSERT(0); - } - - /* - * Combine Src/Dest A term - */ - switch (blend->rt[0].alpha_func) { - case PIPE_BLEND_ADD: - spe_fa(f, fragA_reg, term1A_reg, term2A_reg); - break; - case PIPE_BLEND_SUBTRACT: - spe_fs(f, fragA_reg, term1A_reg, term2A_reg); - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - spe_fs(f, fragA_reg, term2A_reg, term1A_reg); - break; - case PIPE_BLEND_MIN: - spe_float_min(f, fragA_reg, term1A_reg, term2A_reg); - break; - case PIPE_BLEND_MAX: - spe_float_max(f, fragA_reg, term1A_reg, term2A_reg); - break; - default: - ASSERT(0); - } - - spe_release_register(f, term1R_reg); - spe_release_register(f, term1G_reg); - spe_release_register(f, term1B_reg); - spe_release_register(f, term1A_reg); - - spe_release_register(f, term2R_reg); - spe_release_register(f, term2G_reg); - spe_release_register(f, term2B_reg); - spe_release_register(f, term2A_reg); - - spe_release_register(f, fbR_reg); - spe_release_register(f, fbG_reg); - spe_release_register(f, fbB_reg); - spe_release_register(f, fbA_reg); - - spe_release_register(f, tmp_reg); - - /* Free any optional registers that actually got used */ - release_const_register(f, one_reg); - release_const_register(f, constR_reg); - release_const_register(f, constG_reg); - release_const_register(f, constB_reg); - release_const_register(f, constA_reg); -} - - -static void -gen_logicop(const struct pipe_blend_state *blend, - struct spe_function *f, - int fragRGBA_reg, int fbRGBA_reg) -{ - /* We've got four 32-bit RGBA packed pixels in each of - * fragRGBA_reg and fbRGBA_reg, not sets of floating-point - * reds, greens, blues, and alphas. - * */ - ASSERT(blend->logicop_enable); - - switch(blend->logicop_func) { - case PIPE_LOGICOP_CLEAR: /* 0 */ - spe_zero(f, fragRGBA_reg); - break; - case PIPE_LOGICOP_NOR: /* ~(s | d) */ - spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */ - /* andc R, A, B computes R = A & ~B */ - spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ - spe_complement(f, fragRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ - /* andc R, A, B computes R = A & ~B */ - spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_INVERT: /* ~d */ - /* Note that (A nor A) == ~(A|A) == ~A */ - spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_XOR: /* s ^ d */ - spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_NAND: /* ~(s & d) */ - spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_AND: /* s & d */ - spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ - spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - spe_complement(f, fragRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_NOOP: /* d */ - spe_move(f, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */ - /* orc R, A, B computes R = A | ~B */ - spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_COPY: /* s */ - break; - case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */ - /* orc R, A, B computes R = A | ~B */ - spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_OR: /* s | d */ - spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_SET: /* 1 */ - spe_load_int(f, fragRGBA_reg, 0xffffffff); - break; - default: - ASSERT(0); - } -} - - -/** - * Generate code to pack a quad of float colors into four 32-bit integers. - * - * \param f SPE function to append instruction onto. - * \param color_format the dest color packing format - * \param r_reg register containing four red values (in/clobbered) - * \param g_reg register containing four green values (in/clobbered) - * \param b_reg register containing four blue values (in/clobbered) - * \param a_reg register containing four alpha values (in/clobbered) - * \param rgba_reg register to store the packed RGBA colors (out) - */ -static void -gen_pack_colors(struct spe_function *f, - enum pipe_format color_format, - int r_reg, int g_reg, int b_reg, int a_reg, - int rgba_reg) -{ - int rg_reg = spe_allocate_available_register(f); - int ba_reg = spe_allocate_available_register(f); - - /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ - spe_cfltu(f, r_reg, r_reg, 32); - spe_cfltu(f, g_reg, g_reg, 32); - spe_cfltu(f, b_reg, b_reg, 32); - spe_cfltu(f, a_reg, a_reg, 32); - - /* Shift the most significant bytes to the least significant positions. - * I.e.: reg = reg >> 24 - */ - spe_rotmi(f, r_reg, r_reg, -24); - spe_rotmi(f, g_reg, g_reg, -24); - spe_rotmi(f, b_reg, b_reg, -24); - spe_rotmi(f, a_reg, a_reg, -24); - - /* Shift the color bytes according to the surface format */ - if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { - spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ - spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ - spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ - } - else if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { - spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ - spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ - spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ - } - else { - ASSERT(0); - } - - /* Merge red, green, blue, alpha registers to make packed RGBA colors. - * Eg: after shifting according to color_format we might have: - * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} - * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} - * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} - * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} - * OR-ing all those together gives us four packed colors: - * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} - */ - spe_or(f, rg_reg, r_reg, g_reg); - spe_or(f, ba_reg, a_reg, b_reg); - spe_or(f, rgba_reg, rg_reg, ba_reg); - - spe_release_register(f, rg_reg); - spe_release_register(f, ba_reg); -} - - -static void -gen_colormask(struct spe_function *f, - uint colormask, - enum pipe_format color_format, - int fragRGBA_reg, int fbRGBA_reg) -{ - /* We've got four 32-bit RGBA packed pixels in each of - * fragRGBA_reg and fbRGBA_reg, not sets of floating-point - * reds, greens, blues, and alphas. Further, the pixels - * are packed according to the given color format, not - * necessarily RGBA... - */ - uint r_mask; - uint g_mask; - uint b_mask; - uint a_mask; - - /* Calculate exactly where the bits for any particular color - * end up, so we can mask them correctly. - */ - switch(color_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - /* ARGB */ - a_mask = 0xff000000; - r_mask = 0x00ff0000; - g_mask = 0x0000ff00; - b_mask = 0x000000ff; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - /* BGRA */ - b_mask = 0xff000000; - g_mask = 0x00ff0000; - r_mask = 0x0000ff00; - a_mask = 0x000000ff; - break; - default: - ASSERT(0); - } - - /* For each R, G, B, and A component we're supposed to mask out, - * clear its bits. Then our mask operation later will work - * as expected. - */ - if (!(colormask & PIPE_MASK_R)) { - r_mask = 0; - } - if (!(colormask & PIPE_MASK_G)) { - g_mask = 0; - } - if (!(colormask & PIPE_MASK_B)) { - b_mask = 0; - } - if (!(colormask & PIPE_MASK_A)) { - a_mask = 0; - } - - /* Get a temporary register to hold the mask that will be applied - * to the fragment - */ - int colormask_reg = spe_allocate_available_register(f); - - /* The actual mask we're going to use is an OR of the remaining R, G, B, - * and A masks. Load the result value into our temporary register. - */ - spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask); - - /* Use the mask register to select between the fragment color - * values and the frame buffer color values. Wherever the - * mask has a 0 bit, the current frame buffer color should override - * the fragment color. Wherever the mask has a 1 bit, the - * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM) - * instruction will select bits from its first operand rA wherever the - * the mask bits rM are 0, and from its second operand rB wherever the - * mask bits rM are 1. That means that the frame buffer color is the - * first operand, and the fragment color the second. - */ - spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); - - /* Release the temporary register and we're done */ - spe_release_register(f, colormask_reg); -} - - -/** - * This function is annoyingly similar to gen_depth_test(), above, except - * that instead of comparing two varying values (i.e. fragment and buffer), - * we're comparing a varying value with a static value. As such, we have - * access to the Compare Immediate instructions where we don't in - * gen_depth_test(), which is what makes us very different. - * - * There's some added complexity if there's a non-trivial state->mask - * value; then stencil and reference both must be masked - * - * The return value in the stencil_pass_reg is a bitmask of valid - * fragments that also passed the stencil test. The bitmask of valid - * fragments that failed would be found in - * (fragment_mask_reg & ~stencil_pass_reg). - */ -static void -gen_stencil_test(struct spe_function *f, - const struct pipe_stencil_state *state, - const unsigned ref_value, - uint stencil_max_value, - int fragment_mask_reg, - int fbS_reg, - int stencil_pass_reg) -{ - /* Generate code that puts the set of passing fragments into the - * stencil_pass_reg register, taking into account whether each fragment - * was active to begin with. - */ - switch (state->func) { - case PIPE_FUNC_EQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (s == reference) */ - spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */ - uint tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_NOTEQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & ~(s == reference) */ - spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */ - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_LESS: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference < s) */ - spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */ - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_GREATER: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference > s) */ - /* There's no convenient Compare Less Than Immediate instruction, so - * we'll have to do this one the harder way, by loading a register and - * comparing directly. Compare Logical Greater Than Word (clgt) - * treats its operands as unsigned - no sign extension. - */ - int tmp_reg = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, ref_value); - spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - } - else { - /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */ - int tmp_reg = spe_allocate_available_register(f); - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, state->valuemask & ref_value); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_GEQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference >= s) - * = fragment_mask & ~(s > reference) */ - spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, - ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */ - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_LEQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference <= s) ] - * = fragment_mask & ~(reference > s) */ - /* As above, we have to do this by loading a register */ - int tmp_reg = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, ref_value); - spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - } - else { - /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */ - int tmp_reg = spe_allocate_available_register(f); - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, ref_value & state->valuemask); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_NEVER: - /* stencil_pass = fragment_mask & 0 = 0 */ - spe_load_uint(f, stencil_pass_reg, 0); - break; - - case PIPE_FUNC_ALWAYS: - /* stencil_pass = fragment_mask & 1 = fragment_mask */ - spe_move(f, stencil_pass_reg, fragment_mask_reg); - break; - } - - /* The fragments that passed the stencil test are now in stencil_pass_reg. - * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg). - */ -} - - -/** - * This function generates code that calculates a set of new stencil values - * given the earlier values and the operation to apply. It does not - * apply any tests. It is intended to be called up to 3 times - * (for the stencil fail operation, for the stencil pass-z fail operation, - * and for the stencil pass-z pass operation) to collect up to three - * possible sets of values, and for the caller to combine them based - * on the result of the tests. - * - * stencil_max_value should be (2^n - 1) where n is the number of bits - * in the stencil buffer - in other words, it should be usable as a mask. - */ -static void -gen_stencil_values(struct spe_function *f, - uint stencil_op, - uint stencil_ref_value, - uint stencil_max_value, - int fbS_reg, - int newS_reg) -{ - /* The code below assumes that newS_reg and fbS_reg are not the same - * register; if they can be, the calculations below will have to use - * an additional temporary register. For now, mark the assumption - * with an assertion that will fail if they are the same. - */ - ASSERT(fbS_reg != newS_reg); - - /* The code also assumes that the stencil_max_value is of the form - * 2^n-1 and can therefore be used as a mask for the valid bits in - * addition to a maximum. Make sure this is the case as well. - * The clever math below exploits the fact that incrementing a - * binary number serves to flip all the bits of a number starting at - * the LSB and continuing to (and including) the first zero bit - * found. That means that a number and its increment will always - * have at least one bit in common (the high order bit, if nothing - * else) *unless* the number is zero, *or* the number is of a form - * consisting of some number of 1s in the low-order bits followed - * by nothing but 0s in the high-order bits. The latter case - * implies it's of the form 2^n-1. - */ - ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0); - - switch(stencil_op) { - case PIPE_STENCIL_OP_KEEP: - /* newS = S */ - spe_move(f, newS_reg, fbS_reg); - break; - - case PIPE_STENCIL_OP_ZERO: - /* newS = 0 */ - spe_zero(f, newS_reg); - break; - - case PIPE_STENCIL_OP_REPLACE: - /* newS = stencil reference value */ - spe_load_uint(f, newS_reg, stencil_ref_value); - break; - - case PIPE_STENCIL_OP_INCR: { - /* newS = (s == max ? max : s + 1) */ - int equals_reg = spe_allocate_available_register(f); - - spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value); - /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ - spe_ai(f, newS_reg, fbS_reg, 1); - /* Select from the current value or the new value based on the equality test */ - spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); - - spe_release_register(f, equals_reg); - break; - } - case PIPE_STENCIL_OP_DECR: { - /* newS = (s == 0 ? 0 : s - 1) */ - int equals_reg = spe_allocate_available_register(f); - - spe_compare_equal_uint(f, equals_reg, fbS_reg, 0); - /* Add Word Immediate with a (-1) value works */ - spe_ai(f, newS_reg, fbS_reg, -1); - /* Select from the current value or the new value based on the equality test */ - spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); - - spe_release_register(f, equals_reg); - break; - } - case PIPE_STENCIL_OP_INCR_WRAP: - /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can - * do a normal add and mask off the correct bits - */ - spe_ai(f, newS_reg, fbS_reg, 1); - spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); - break; - - case PIPE_STENCIL_OP_DECR_WRAP: - /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */ - spe_ai(f, newS_reg, fbS_reg, -1); - spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); - break; - - case PIPE_STENCIL_OP_INVERT: - /* newS = ~s. We take advantage of the mask/max value to invert only - * the valid bits for the field so we don't have to do an extra "and". - */ - spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value); - break; - - default: - ASSERT(0); - } -} - - -/** - * This function generates code to get all the necessary possible - * stencil values. For each of the output registers (fail_reg, - * zfail_reg, and zpass_reg), it either allocates a new register - * and calculates a new set of values based on the stencil operation, - * or it reuses a register allocation and calculation done for an - * earlier (matching) operation, or it reuses the fbS_reg register - * (if the stencil operation is KEEP, which doesn't change the - * stencil buffer). - * - * Since this function allocates a variable number of registers, - * to avoid incurring complex logic to free them, they should - * be allocated after a spe_allocate_register_set() call - * and released by the corresponding spe_release_register_set() call. - */ -static void -gen_get_stencil_values(struct spe_function *f, - const struct pipe_stencil_state *stencil, - const unsigned ref_value, - const uint depth_enabled, - int fbS_reg, - int *fail_reg, - int *zfail_reg, - int *zpass_reg) -{ - uint zfail_op; - - /* Stenciling had better be enabled here */ - ASSERT(stencil->enabled); - - /* If the depth test is not enabled, it is treated as though it always - * passes, which means that the zfail_op is not considered - a - * failing stencil test triggers the fail_op, and a passing one - * triggers the zpass_op - * - * As an optimization, override calculation of the zfail_op values - * if they aren't going to be used. By setting the value of - * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed - * to match the incoming stencil values, and no calculation will - * be done. - */ - if (depth_enabled) { - zfail_op = stencil->zfail_op; - } - else { - zfail_op = PIPE_STENCIL_OP_KEEP; - } - - /* One-sided or front-facing stencil */ - if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) { - *fail_reg = fbS_reg; - } - else { - *fail_reg = spe_allocate_available_register(f); - gen_stencil_values(f, stencil->fail_op, ref_value, - 0xff, fbS_reg, *fail_reg); - } - - /* Check the possibly overridden value, not the structure value */ - if (zfail_op == PIPE_STENCIL_OP_KEEP) { - *zfail_reg = fbS_reg; - } - else if (zfail_op == stencil->fail_op) { - *zfail_reg = *fail_reg; - } - else { - *zfail_reg = spe_allocate_available_register(f); - gen_stencil_values(f, stencil->zfail_op, ref_value, - 0xff, fbS_reg, *zfail_reg); - } - - if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { - *zpass_reg = fbS_reg; - } - else if (stencil->zpass_op == stencil->fail_op) { - *zpass_reg = *fail_reg; - } - else if (stencil->zpass_op == zfail_op) { - *zpass_reg = *zfail_reg; - } - else { - *zpass_reg = spe_allocate_available_register(f); - gen_stencil_values(f, stencil->zpass_op, ref_value, - 0xff, fbS_reg, *zpass_reg); - } -} - -/** - * Note that fbZ_reg may *not* be set on entry, if in fact - * the depth test is not enabled. This function must not use - * the register if depth is not enabled. - */ -static boolean -gen_stencil_depth_test(struct spe_function *f, - const struct pipe_depth_stencil_alpha_state *dsa, - const struct pipe_stencil_ref *stencil_ref, - const uint facing, - const int mask_reg, const int fragZ_reg, - const int fbZ_reg, const int fbS_reg) -{ - /* True if we've generated code that could require writeback to the - * depth and/or stencil buffers - */ - boolean modified_buffers = FALSE; - - boolean need_to_calculate_stencil_values; - boolean need_to_writemask_stencil_values; - - struct pipe_stencil_state *stencil; - - /* Registers. We may or may not actually allocate these, depending - * on whether the state values indicate that we need them. - */ - int stencil_pass_reg, stencil_fail_reg; - int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values; - int stencil_writemask_reg; - int zmask_reg; - int newS_reg; - unsigned ref_value; - - /* Stenciling is quite complex: up to six different configurable stencil - * operations/calculations can be required (three each for front-facing - * and back-facing fragments). Many of those operations will likely - * be identical, so there's good reason to try to avoid calculating - * the same values more than once (which unfortunately makes the code less - * straightforward). - * - * To make register management easier, we start a new - * register set; we can release all the registers in the set at - * once, and avoid having to keep track of exactly which registers - * we allocate. We can still allocate and free registers as - * desired (if we know we no longer need a register), but we don't - * have to spend the complexity to track the more difficult variant - * register usage scenarios. - */ - spe_comment(f, 0, "Allocating stencil register set"); - spe_allocate_register_set(f); - - /* The facing we're given is the fragment facing; it doesn't - * exactly match the stencil facing. If stencil is enabled, - * but two-sided stencil is *not* enabled, we use the same - * stencil settings for both front- and back-facing fragments. - * We only use the "back-facing" stencil for backfacing fragments - * if two-sided stenciling is enabled. - */ - if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) { - stencil = &dsa->stencil[1]; - ref_value = stencil_ref->ref_value[1]; - } - else { - stencil = &dsa->stencil[0]; - ref_value = stencil_ref->ref_value[0]; - } - - /* Calculate the writemask. If the writemask is trivial (either - * all 0s, meaning that we don't need to calculate any stencil values - * because they're not going to change the stencil anyway, or all 1s, - * meaning that we have to calculate the stencil values but do not - * need to mask them), we can avoid generating code. Don't forget - * that we need to consider backfacing stencil, if enabled. - * - * Note that if the backface stencil is *not* enabled, the backface - * stencil will have the same values as the frontface stencil. - */ - if (stencil->fail_op == PIPE_STENCIL_OP_KEEP && - stencil->zfail_op == PIPE_STENCIL_OP_KEEP && - stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { - need_to_calculate_stencil_values = FALSE; - need_to_writemask_stencil_values = FALSE; - } - else if (stencil->writemask == 0x0) { - /* All changes are writemasked out, so no need to calculate - * what those changes might be, and no need to write anything back. - */ - need_to_calculate_stencil_values = FALSE; - need_to_writemask_stencil_values = FALSE; - } - else if (stencil->writemask == 0xff) { - /* Still trivial, but a little less so. We need to write the stencil - * values, but we don't need to mask them. - */ - need_to_calculate_stencil_values = TRUE; - need_to_writemask_stencil_values = FALSE; - } - else { - /* The general case: calculate, mask, and write */ - need_to_calculate_stencil_values = TRUE; - need_to_writemask_stencil_values = TRUE; - - /* While we're here, generate code that calculates what the - * writemask should be. If backface stenciling is enabled, - * and the backface writemask is not the same as the frontface - * writemask, we'll have to generate code that merges the - * two masks into a single effective mask based on fragment facing. - */ - spe_comment(f, 0, "Computing stencil writemask"); - stencil_writemask_reg = spe_allocate_available_register(f); - spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].writemask); - } - - /* At least one-sided stenciling must be on. Generate code that - * runs the stencil test on the basic/front-facing stencil, leaving - * the mask of passing stencil bits in stencil_pass_reg. This mask will - * be used both to mask the set of active pixels, and also to - * determine how the stencil buffer changes. - * - * This test will *not* change the value in mask_reg (because we don't - * yet know whether to apply the two-sided stencil or one-sided stencil). - */ - spe_comment(f, 0, "Running basic stencil test"); - stencil_pass_reg = spe_allocate_available_register(f); - gen_stencil_test(f, stencil, ref_value, 0xff, mask_reg, fbS_reg, stencil_pass_reg); - - /* Generate code that, given the mask of valid fragments and the - * mask of valid fragments that passed the stencil test, computes - * the mask of valid fragments that failed the stencil test. We - * have to do this before we run a depth test (because the - * depth test should not be performed on fragments that failed the - * stencil test, and because the depth test will update the - * mask of valid fragments based on the results of the depth test). - */ - spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask"); - stencil_fail_reg = spe_allocate_available_register(f); - spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); - /* Now remove the stenciled-out pixels from the valid fragment mask, - * so we can later use the valid fragment mask in the depth test. - */ - spe_and(f, mask_reg, mask_reg, stencil_pass_reg); - - /* We may not need to calculate stencil values, if the writemask is off */ - if (need_to_calculate_stencil_values) { - /* Generate code that calculates exactly which stencil values we need, - * without calculating the same value twice (say, if two different - * stencil ops have the same value). This code will work for one-sided - * and two-sided stenciling (so that we take into account that operations - * may match between front and back stencils), and will also take into - * account whether the depth test is enabled (if the depth test is off, - * we don't need any of the zfail results, because the depth test always - * is considered to pass if it is disabled). Any register value that - * does not need to be calculated will come back with the same value - * that's in fbS_reg. - * - * This function will allocate a variant number of registers that - * will be released as part of the register set. - */ - spe_comment(f, 0, facing == CELL_FACING_FRONT - ? "Computing front-facing stencil values" - : "Computing back-facing stencil values"); - gen_get_stencil_values(f, stencil, ref_value, dsa->depth.enabled, fbS_reg, - &stencil_fail_values, &stencil_pass_depth_fail_values, - &stencil_pass_depth_pass_values); - } - - /* We now have all the stencil values we need. We also need - * the results of the depth test to figure out which - * stencil values will become the new stencil values. (Even if - * we aren't actually calculating stencil values, we need to apply - * the depth test if it's enabled.) - * - * The code generated by gen_depth_test() returns the results of the - * test in the given register, but also alters the mask_reg based - * on the results of the test. - */ - if (dsa->depth.enabled) { - spe_comment(f, 0, "Running stencil depth test"); - zmask_reg = spe_allocate_available_register(f); - modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, - fbZ_reg, zmask_reg); - } - - if (need_to_calculate_stencil_values) { - - /* If we need to writemask the stencil values before going into - * the stencil buffer, we'll have to use a new register to - * hold the new values. If not, we can just keep using the - * current register. - */ - if (need_to_writemask_stencil_values) { - newS_reg = spe_allocate_available_register(f); - spe_comment(f, 0, "Saving current stencil values for writemasking"); - spe_move(f, newS_reg, fbS_reg); - } - else { - newS_reg = fbS_reg; - } - - /* Merge in the selected stencil fail values */ - if (stencil_fail_values != fbS_reg) { - spe_comment(f, 0, "Loading stencil fail values"); - spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); - modified_buffers = TRUE; - } - - /* Same for the stencil pass/depth fail values. If this calculation - * is not needed (say, if depth test is off), then the - * stencil_pass_depth_fail_values register will be equal to fbS_reg - * and we'll skip the calculation. - */ - if (stencil_pass_depth_fail_values != fbS_reg) { - /* We don't actually have a stencil pass/depth fail mask yet. - * Calculate it here from the stencil passing mask and the - * depth passing mask. Note that zmask_reg *must* have been - * set above if we're here. - */ - uint stencil_pass_depth_fail_mask = - spe_allocate_available_register(f); - - spe_comment(f, 0, "Loading stencil pass/depth fail values"); - spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); - - spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, - stencil_pass_depth_fail_mask); - - spe_release_register(f, stencil_pass_depth_fail_mask); - modified_buffers = TRUE; - } - - /* Same for the stencil pass/depth pass mask. Note that we - * *can* get here with zmask_reg being unset (if the depth - * test is off but the stencil test is on). In this case, - * we assume the depth test passes, and don't need to mask - * the stencil pass mask with the Z mask. - */ - if (stencil_pass_depth_pass_values != fbS_reg) { - if (dsa->depth.enabled) { - uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f); - /* We'll need a separate register */ - spe_comment(f, 0, "Loading stencil pass/depth pass values"); - spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); - spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); - spe_release_register(f, stencil_pass_depth_pass_mask); - } - else { - /* We can use the same stencil-pass register */ - spe_comment(f, 0, "Loading stencil pass values"); - spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg); - } - modified_buffers = TRUE; - } - - /* Almost done. If we need to writemask, do it now, leaving the - * results in the fbS_reg register passed in. If we don't need - * to writemask, then the results are *already* in the fbS_reg, - * so there's nothing more to do. - */ - - if (need_to_writemask_stencil_values && modified_buffers) { - /* The Select Bytes command makes a fine writemask. Where - * the mask is 0, the first (original) values are retained, - * effectively masking out changes. Where the mask is 1, the - * second (new) values are retained, incorporating changes. - */ - spe_comment(f, 0, "Writemasking new stencil values"); - spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); - } - - } /* done calculating stencil values */ - - /* The stencil and/or depth values have been applied, and the - * mask_reg, fbS_reg, and fbZ_reg values have been updated. - * We're all done, except that we've allocated a fair number - * of registers that we didn't bother tracking. Release all - * those registers as part of the register set, and go home. - */ - spe_comment(f, 0, "Releasing stencil register set"); - spe_release_register_set(f); - - /* Return TRUE if we could have modified the stencil and/or - * depth buffers. - */ - return modified_buffers; -} - - -/** - * Generate depth and/or stencil test code. - * \param cell context - * \param dsa depth/stencil/alpha state - * \param f spe function to emit - * \param facing either CELL_FACING_FRONT or CELL_FACING_BACK - * \param mask_reg register containing the pixel alive/dead mask - * \param depth_tile_reg register containing address of z/stencil tile - * \param quad_offset_reg offset to quad from start of tile - * \param fragZ_reg register containg fragment Z values - */ -static void -gen_depth_stencil(struct cell_context *cell, - const struct pipe_depth_stencil_alpha_state *dsa, - const struct pipe_stencil_ref *stencil_ref, - struct spe_function *f, - uint facing, - int mask_reg, - int depth_tile_reg, - int quad_offset_reg, - int fragZ_reg) - -{ - const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; - boolean write_depth_stencil; - - /* framebuffer's combined z/stencil values register */ - int fbZS_reg = spe_allocate_available_register(f); - - /* Framebufer Z values register */ - int fbZ_reg = spe_allocate_available_register(f); - - /* Framebuffer stencil values register (may not be used) */ - int fbS_reg = spe_allocate_available_register(f); - - /* 24-bit mask register (may not be used) */ - int zmask_reg = spe_allocate_available_register(f); - - /** - * The following code: - * 1. fetch quad of packed Z/S values from the framebuffer tile. - * 2. extract the separate the Z and S values from packed values - * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints - * - * The instructions for doing this are interleaved for better performance. - */ - spe_comment(f, 0, "Fetch Z/stencil quad from tile"); - - switch(zs_format) { - case PIPE_FORMAT_Z24_UNORM_S8_UINT: /* fall through */ - case PIPE_FORMAT_Z24X8_UNORM: - /* prepare mask to extract Z vals from ZS vals */ - spe_load_uint(f, zmask_reg, 0x00ffffff); - - /* convert fragment Z from [0,1] to 32-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - - /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - - /* right shift 32-bit fragment Z to 24 bits */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -8); - - /* extract 24-bit Z values from ZS values by masking */ - spe_and(f, fbZ_reg, fbZS_reg, zmask_reg); - - /* extract 8-bit stencil values by shifting */ - spe_rotmi(f, fbS_reg, fbZS_reg, -24); - break; - - case PIPE_FORMAT_S8_UINT_Z24_UNORM: /* fall through */ - case PIPE_FORMAT_X8Z24_UNORM: - /* convert fragment Z from [0,1] to 32-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - - /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - - /* right shift 32-bit fragment Z to 24 bits */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -8); - - /* extract 24-bit Z values from ZS values by shifting */ - spe_rotmi(f, fbZ_reg, fbZS_reg, -8); - - /* extract 8-bit stencil values by masking */ - spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff); - break; - - case PIPE_FORMAT_Z32_UNORM: - /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg); - - /* convert fragment Z from [0,1] to 32-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - - /* No stencil, so can't do anything there */ - break; - - case PIPE_FORMAT_Z16_UNORM: - /* XXX This code for 16bpp Z is broken! */ - - /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - - /* Copy over 4 32-bit values */ - spe_move(f, fbZ_reg, fbZS_reg); - - /* convert Z from [0,1] to 16-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - spe_rotmi(f, fragZ_reg, fragZ_reg, -16); - /* No stencil */ - break; - - default: - ASSERT(0); /* invalid format */ - } - - /* If stencil is enabled, use the stencil-specific code - * generator to generate both the stencil and depth (if needed) - * tests. Otherwise, if only depth is enabled, generate - * a quick depth test. The test generators themselves will - * report back whether the depth/stencil buffer has to be - * written back. - */ - if (dsa->stencil[0].enabled) { - /* This will perform the stencil and depth tests, and update - * the mask_reg, fbZ_reg, and fbS_reg as required by the - * tests. - */ - ASSERT(fbS_reg >= 0); - spe_comment(f, 0, "Perform stencil test"); - - /* Note that fbZ_reg may not be set on entry, if stenciling - * is enabled but there's no Z-buffer. The - * gen_stencil_depth_test() function must ignore the - * fbZ_reg register if depth is not enabled. - */ - write_depth_stencil = gen_stencil_depth_test(f, dsa, stencil_ref, facing, - mask_reg, fragZ_reg, - fbZ_reg, fbS_reg); - } - else if (dsa->depth.enabled) { - int zmask_reg = spe_allocate_available_register(f); - ASSERT(fbZ_reg >= 0); - spe_comment(f, 0, "Perform depth test"); - write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, - fbZ_reg, zmask_reg); - spe_release_register(f, zmask_reg); - } - else { - write_depth_stencil = FALSE; - } - - if (write_depth_stencil) { - /* Merge latest Z and Stencil values into fbZS_reg. - * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. - * fbS_reg has four 8-bit Z values in bits [7..0]. - */ - spe_comment(f, 0, "Store quad's depth/stencil values in tile"); - if (zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ - } - else if (zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ - } - else if (zs_format == PIPE_FORMAT_Z32_UNORM) { - spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ - } - else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ - } - else if (zs_format == PIPE_FORMAT_S8_UINT) { - ASSERT(0); /* XXX to do */ - } - else { - ASSERT(0); /* bad zs_format */ - } - - /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ - spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - } - - /* Don't need these any more */ - spe_release_register(f, fbZS_reg); - spe_release_register(f, fbZ_reg); - spe_release_register(f, fbS_reg); - spe_release_register(f, zmask_reg); -} - - - -/** - * Generate SPE code to implement the fragment operations (alpha test, - * depth test, stencil test, blending, colormask, and final - * framebuffer write) as specified by the current context state. - * - * Logically, this code will be called after running the fragment - * shader. But under some circumstances we could run some of this - * code before the fragment shader to cull fragments/quads that are - * totally occluded/discarded. - * - * XXX we only support PIPE_FORMAT_S8_UINT_Z24_UNORM z/stencil buffer right now. - * - * See the spu_default_fragment_ops() function to see how the per-fragment - * operations would be done with ordinary C code. - * The code we generate here though has no branches, is SIMD, etc and - * should be much faster. - * - * \param cell the rendering context (in) - * \param facing whether the generated code is for front-facing or - * back-facing fragments - * \param f the generated function (in/out); on input, the function - * must already have been initialized. On exit, whatever - * instructions within the generated function have had - * the fragment ops appended. - */ -void -cell_gen_fragment_function(struct cell_context *cell, - const uint facing, - struct spe_function *f) -{ - const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil; - const struct pipe_stencil_ref *stencil_ref = &cell->stencil_ref; - const struct pipe_blend_state *blend = cell->blend; - const struct pipe_blend_color *blend_color = &cell->blend_color; - const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; - - /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ - const int x_reg = 3; /* uint */ - const int y_reg = 4; /* uint */ - const int color_tile_reg = 5; /* tile_t * */ - const int depth_tile_reg = 6; /* tile_t * */ - const int fragZ_reg = 7; /* vector float */ - const int fragR_reg = 8; /* vector float */ - const int fragG_reg = 9; /* vector float */ - const int fragB_reg = 10; /* vector float */ - const int fragA_reg = 11; /* vector float */ - const int mask_reg = 12; /* vector uint */ - - ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK); - - /* offset of quad from start of tile - * XXX assuming 4-byte pixels for color AND Z/stencil!!!! - */ - int quad_offset_reg; - - int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ - - if (cell->debug_flags & CELL_DEBUG_ASM) { - spe_print_code(f, TRUE); - spe_indent(f, 8); - spe_comment(f, -4, facing == CELL_FACING_FRONT - ? "Begin front-facing per-fragment ops" - : "Begin back-facing per-fragment ops"); - } - - spe_allocate_register(f, x_reg); - spe_allocate_register(f, y_reg); - spe_allocate_register(f, color_tile_reg); - spe_allocate_register(f, depth_tile_reg); - spe_allocate_register(f, fragZ_reg); - spe_allocate_register(f, fragR_reg); - spe_allocate_register(f, fragG_reg); - spe_allocate_register(f, fragB_reg); - spe_allocate_register(f, fragA_reg); - spe_allocate_register(f, mask_reg); - - quad_offset_reg = spe_allocate_available_register(f); - fbRGBA_reg = spe_allocate_available_register(f); - - /* compute offset of quad from start of tile, in bytes */ - { - int x2_reg = spe_allocate_available_register(f); - int y2_reg = spe_allocate_available_register(f); - - ASSERT(TILE_SIZE == 32); - - spe_comment(f, 0, "Compute quad offset within tile"); - spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ - spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ - spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ - spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ - spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ - - spe_release_register(f, x2_reg); - spe_release_register(f, y2_reg); - } - - /* Generate the alpha test, if needed. */ - if (dsa->alpha.enabled) { - gen_alpha_test(dsa, f, mask_reg, fragA_reg); - } - - /* generate depth and/or stencil test code */ - if (dsa->depth.enabled || dsa->stencil[0].enabled) { - gen_depth_stencil(cell, dsa, stencil_ref, f, - facing, - mask_reg, - depth_tile_reg, - quad_offset_reg, - fragZ_reg); - } - - /* Get framebuffer quad/colors. We'll need these for blending, - * color masking, and to obey the quad/pixel mask. - * Load: fbRGBA_reg = memory[color_tile + quad_offset] - * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking - * we could skip this load. - */ - spe_comment(f, 0, "Fetch quad colors from tile"); - spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); - - if (blend->rt[0].blend_enable) { - spe_comment(f, 0, "Perform blending"); - gen_blend(blend, blend_color, f, color_format, - fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); - } - - /* - * Write fragment colors to framebuffer/tile. - * This involves converting the fragment colors from float[4] to the - * tile's specific format and obeying the quad/pixel mask. - */ - { - int rgba_reg = spe_allocate_available_register(f); - - /* Pack four float colors as four 32-bit int colors */ - spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors"); - gen_pack_colors(f, color_format, - fragR_reg, fragG_reg, fragB_reg, fragA_reg, - rgba_reg); - - if (blend->logicop_enable) { - spe_comment(f, 0, "Compute logic op"); - gen_logicop(blend, f, rgba_reg, fbRGBA_reg); - } - - if (blend->rt[0].colormask != PIPE_MASK_RGBA) { - spe_comment(f, 0, "Compute color mask"); - gen_colormask(f, blend->rt[0].colormask, color_format, rgba_reg, fbRGBA_reg); - } - - /* Mix fragment colors with framebuffer colors using the quad/pixel mask: - * if (mask[i]) - * rgba[i] = rgba[i]; - * else - * rgba[i] = framebuffer[i]; - */ - spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); - - /* Store updated quad in tile: - * memory[color_tile + quad_offset] = rgba_reg; - */ - spe_comment(f, 0, "Store quad colors into color tile"); - spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); - - spe_release_register(f, rgba_reg); - } - - //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); - - spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ - - spe_release_register(f, fbRGBA_reg); - spe_release_register(f, quad_offset_reg); - - if (cell->debug_flags & CELL_DEBUG_ASM) { - char buffer[1024]; - sprintf(buffer, "End %s-facing per-fragment ops: %d instructions", - facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst); - spe_comment(f, -4, buffer); - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h deleted file mode 100644 index 21b35d1fafe..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_GEN_FRAGMENT_H -#define CELL_GEN_FRAGMENT_H - - -extern void -cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f); - - -#endif /* CELL_GEN_FRAGMENT_H */ - diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c deleted file mode 100644 index 223adda48f0..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ /dev/null @@ -1,473 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: - * Keith Whitwell <[email protected]> - * Brian Paul - */ - -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "draw/draw_context.h" -#include "cell_context.h" -#include "cell_flush.h" -#include "cell_pipe_state.h" -#include "cell_state.h" -#include "cell_texture.h" - - - -static void * -cell_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - return mem_dup(blend, sizeof(*blend)); -} - - -static void -cell_bind_blend_state(struct pipe_context *pipe, void *blend) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->blend = (struct pipe_blend_state *) blend; - cell->dirty |= CELL_NEW_BLEND; -} - - -static void -cell_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - FREE(blend); -} - - -static void -cell_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *blend_color) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->blend_color = *blend_color; - - cell->dirty |= CELL_NEW_BLEND; -} - - - - -static void * -cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *dsa) -{ - return mem_dup(dsa, sizeof(*dsa)); -} - - -static void -cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, - void *dsa) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa; - cell->dirty |= CELL_NEW_DEPTH_STENCIL; -} - - -static void -cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa) -{ - FREE(dsa); -} - - -static void -cell_set_stencil_ref(struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->stencil_ref = *stencil_ref; - - cell->dirty |= CELL_NEW_DEPTH_STENCIL; -} - - -static void -cell_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ - struct cell_context *cell = cell_context(pipe); - - /* pass the clip state to the draw module */ - draw_set_clip_state(cell->draw, clip); -} - - -static void -cell_set_sample_mask(struct pipe_context *pipe, - unsigned sample_mask) -{ -} - - -/* Called when driver state tracker notices changes to the viewport - * matrix: - */ -static void -cell_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct cell_context *cell = cell_context(pipe); - - cell->viewport = *viewport; /* struct copy */ - cell->dirty |= CELL_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - draw_set_viewport_state(cell->draw, viewport); - - /* Using tnl/ and vf/ modules is temporary while getting started. - * Full pipe will have vertex shader, vertex fetch of its own. - */ -} - - -static void -cell_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct cell_context *cell = cell_context(pipe); - - memcpy( &cell->scissor, scissor, sizeof(*scissor) ); - cell->dirty |= CELL_NEW_SCISSOR; -} - - -static void -cell_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ - struct cell_context *cell = cell_context(pipe); - - memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); - cell->dirty |= CELL_NEW_STIPPLE; -} - - - -static void * -cell_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *rasterizer) -{ - return mem_dup(rasterizer, sizeof(*rasterizer)); -} - - -static void -cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast) -{ - struct pipe_rasterizer_state *rasterizer = - (struct pipe_rasterizer_state *) rast; - struct cell_context *cell = cell_context(pipe); - - /* pass-through to draw module */ - draw_set_rasterizer_state(cell->draw, rasterizer, rast); - - cell->rasterizer = rasterizer; - - cell->dirty |= CELL_NEW_RASTERIZER; -} - - -static void -cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) -{ - FREE(rasterizer); -} - - - -static void * -cell_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - return mem_dup(sampler, sizeof(*sampler)); -} - - -static void -cell_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **samplers) -{ - struct cell_context *cell = cell_context(pipe); - uint i, changed = 0x0; - - assert(num <= CELL_MAX_SAMPLERS); - - draw_flush(cell->draw); - - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL; - if (cell->sampler[i] != new_samp) { - cell->sampler[i] = new_samp; - changed |= (1 << i); - } - } - - if (changed) { - cell->dirty |= CELL_NEW_SAMPLER; - cell->dirty_samplers |= changed; - } -} - - -static void -cell_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - FREE( sampler ); -} - - - -static void -cell_set_fragment_sampler_views(struct pipe_context *pipe, - unsigned num, - struct pipe_sampler_view **views) -{ - struct cell_context *cell = cell_context(pipe); - uint i, changed = 0x0; - - assert(num <= CELL_MAX_SAMPLERS); - - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - struct pipe_sampler_view *new_view = i < num ? views[i] : NULL; - struct pipe_sampler_view *old_view = cell->fragment_sampler_views[i]; - - if (old_view != new_view) { - struct pipe_resource *new_tex = new_view ? new_view->texture : NULL; - - pipe_sampler_view_reference(&cell->fragment_sampler_views[i], - new_view); - pipe_resource_reference((struct pipe_resource **) &cell->texture[i], - (struct pipe_resource *) new_tex); - - changed |= (1 << i); - } - } - - cell->num_textures = num; - - if (changed) { - cell->dirty |= CELL_NEW_TEXTURE; - cell->dirty_textures |= changed; - } -} - - -static struct pipe_sampler_view * -cell_create_sampler_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ) -{ - struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); - - if (view) { - *view = *templ; - view->reference.count = 1; - view->texture = NULL; - pipe_resource_reference(&view->texture, texture); - view->context = pipe; - } - - return view; -} - - -static void -cell_sampler_view_destroy(struct pipe_context *pipe, - struct pipe_sampler_view *view) -{ - pipe_resource_reference(&view->texture, NULL); - FREE(view); -} - - -/** - * Map color and z/stencil framebuffer surfaces. - */ -static void -cell_map_surfaces(struct cell_context *cell) -{ -#if 0 - struct pipe_screen *screen = cell->pipe.screen; -#endif - uint i; - - for (i = 0; i < 1; i++) { - struct pipe_surface *ps = cell->framebuffer.cbufs[i]; - if (ps) { - struct cell_resource *ct = cell_resource(ps->texture); -#if 0 - cell->cbuf_map[i] = screen->buffer_map(screen, - ct->buffer, - (PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE)); -#else - cell->cbuf_map[i] = ct->data; -#endif - } - } - - { - struct pipe_surface *ps = cell->framebuffer.zsbuf; - if (ps) { - struct cell_resource *ct = cell_resource(ps->texture); -#if 0 - cell->zsbuf_map = screen->buffer_map(screen, - ct->buffer, - (PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE)); -#else - cell->zsbuf_map = ct->data; -#endif - } - } -} - - -/** - * Unmap color and z/stencil framebuffer surfaces. - */ -static void -cell_unmap_surfaces(struct cell_context *cell) -{ - /*struct pipe_screen *screen = cell->pipe.screen;*/ - uint i; - - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - struct pipe_surface *ps = cell->framebuffer.cbufs[i]; - if (ps && cell->cbuf_map[i]) { - /*struct cell_resource *ct = cell_resource(ps->texture);*/ - assert(ps->texture); - /*assert(ct->buffer);*/ - - /*screen->buffer_unmap(screen, ct->buffer);*/ - cell->cbuf_map[i] = NULL; - } - } - - { - struct pipe_surface *ps = cell->framebuffer.zsbuf; - if (ps && cell->zsbuf_map) { - /*struct cell_resource *ct = cell_resource(ps->texture);*/ - /*screen->buffer_unmap(screen, ct->buffer);*/ - cell->zsbuf_map = NULL; - } - } -} - - -static void -cell_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct cell_context *cell = cell_context(pipe); - - if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { - uint i; - - /* unmap old surfaces */ - cell_unmap_surfaces(cell); - - /* Finish any pending rendering to the current surface before - * installing a new surface! - */ - cell_flush_int(cell, CELL_FLUSH_WAIT); - - /* update my state - * (this is also where old surfaces will finally get freed) - */ - cell->framebuffer.width = fb->width; - cell->framebuffer.height = fb->height; - cell->framebuffer.nr_cbufs = fb->nr_cbufs; - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); - } - pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf); - - /* map new surfaces */ - cell_map_surfaces(cell); - - cell->dirty |= CELL_NEW_FRAMEBUFFER; - } -} - - -void -cell_init_state_functions(struct cell_context *cell) -{ - cell->pipe.create_blend_state = cell_create_blend_state; - cell->pipe.bind_blend_state = cell_bind_blend_state; - cell->pipe.delete_blend_state = cell_delete_blend_state; - - cell->pipe.create_sampler_state = cell_create_sampler_state; - cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states; - cell->pipe.delete_sampler_state = cell_delete_sampler_state; - - cell->pipe.set_fragment_sampler_views = cell_set_fragment_sampler_views; - cell->pipe.create_sampler_view = cell_create_sampler_view; - cell->pipe.sampler_view_destroy = cell_sampler_view_destroy; - - cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; - cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; - cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; - - cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; - cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; - cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; - - cell->pipe.set_blend_color = cell_set_blend_color; - cell->pipe.set_stencil_ref = cell_set_stencil_ref; - cell->pipe.set_clip_state = cell_set_clip_state; - cell->pipe.set_sample_mask = cell_set_sample_mask; - - cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; - - cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; - cell->pipe.set_scissor_state = cell_set_scissor_state; - cell->pipe.set_viewport_state = cell_set_viewport_state; -} diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.h b/src/gallium/drivers/cell/ppu/cell_pipe_state.h deleted file mode 100644 index 1889bd52ff5..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.h +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_PIPE_STATE_H -#define CELL_PIPE_STATE_H - - -struct cell_context; - -extern void -cell_init_state_functions(struct cell_context *cell); - - -#endif /* CELL_PIPE_STATE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_public.h b/src/gallium/drivers/cell/ppu/cell_public.h deleted file mode 100644 index 7e2e093565d..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_public.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef CELL_PUBLIC_H -#define CELL_PUBLIC_H - -struct pipe_screen; -struct sw_winsys; - -struct pipe_screen * -cell_create_screen(struct sw_winsys *winsys); - -#endif diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c deleted file mode 100644 index f648482c551..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ /dev/null @@ -1,211 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief Last stage of 'draw' pipeline: send tris to SPUs. - * \author Brian Paul - */ - -#include "cell_context.h" -#include "cell_render.h" -#include "cell_spu.h" -#include "util/u_memory.h" -#include "draw/draw_private.h" - - -struct render_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct cell_context *cell; -}; - - -static INLINE struct render_stage * -render_stage(struct draw_stage *stage) -{ - return (struct render_stage *) stage; -} - - -static void render_begin( struct draw_stage *stage ) -{ -#if 0 - struct render_stage *render = render_stage(stage); - struct cell_context *sp = render->cell; - const struct pipe_shader_state *fs = &render->cell->fs->shader; - render->quad.nr_attrs = render->cell->nr_frag_attrs; - - render->firstFpInput = fs->input_semantic_name[0]; - - sp->quad.first->begin(sp->quad.first); -#endif -} - - -static void render_end( struct draw_stage *stage ) -{ -} - - -static void reset_stipple_counter( struct draw_stage *stage ) -{ - struct render_stage *render = render_stage(stage); - /*render->cell->line_stipple_counter = 0;*/ -} - - -static void -render_point(struct draw_stage *stage, struct prim_header *prim) -{ -} - - -static void -render_line(struct draw_stage *stage, struct prim_header *prim) -{ -} - - -/** Write a vertex into the prim buffer */ -static void -save_vertex(struct cell_prim_buffer *buf, uint pos, - const struct vertex_header *vert) -{ - uint attr, j; - - for (attr = 0; attr < 2; attr++) { - for (j = 0; j < 4; j++) { - buf->vertex[pos][attr][j] = vert->data[attr][j]; - } - } - - /* update bounding box */ - if (vert->data[0][0] < buf->xmin) - buf->xmin = vert->data[0][0]; - if (vert->data[0][0] > buf->xmax) - buf->xmax = vert->data[0][0]; - if (vert->data[0][1] < buf->ymin) - buf->ymin = vert->data[0][1]; - if (vert->data[0][1] > buf->ymax) - buf->ymax = vert->data[0][1]; -} - - -static void -render_tri(struct draw_stage *stage, struct prim_header *prim) -{ - struct render_stage *rs = render_stage(stage); - struct cell_context *cell = rs->cell; - struct cell_prim_buffer *buf = &cell->prim_buffer; - uint i; - - if (buf->num_verts + 3 > CELL_MAX_VERTS) { - cell_flush_prim_buffer(cell); - } - - i = buf->num_verts; - assert(i+2 <= CELL_MAX_VERTS); - save_vertex(buf, i+0, prim->v[0]); - save_vertex(buf, i+1, prim->v[1]); - save_vertex(buf, i+2, prim->v[2]); - buf->num_verts += 3; -} - - -/** - * Send the a RENDER command to all SPUs to have them render the prims - * in the current prim_buffer. - */ -void -cell_flush_prim_buffer(struct cell_context *cell) -{ - uint i; - - if (cell->prim_buffer.num_verts == 0) - return; - - for (i = 0; i < cell->num_spus; i++) { - struct cell_command_render *render = &cell_global.command[i].render; - render->prim_type = PIPE_PRIM_TRIANGLES; - render->num_verts = cell->prim_buffer.num_verts; - render->front_ccw = cell->rasterizer->front_ccw; - render->vertex_size = cell->vertex_info->size * 4; - render->xmin = cell->prim_buffer.xmin; - render->ymin = cell->prim_buffer.ymin; - render->xmax = cell->prim_buffer.xmax; - render->ymax = cell->prim_buffer.ymax; - render->vertex_data = &cell->prim_buffer.vertex; - ASSERT_ALIGN16(render->vertex_data); - send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER); - } - - cell->prim_buffer.num_verts = 0; - - cell->prim_buffer.xmin = 1e100; - cell->prim_buffer.ymin = 1e100; - cell->prim_buffer.xmax = -1e100; - cell->prim_buffer.ymax = -1e100; - - /* XXX temporary, need to double-buffer the prim buffer until we get - * a real command buffer/list system. - */ - cell_flush(&cell->pipe, 0x0); -} - - - -static void render_destroy( struct draw_stage *stage ) -{ - FREE( stage ); -} - - -/** - * Create a new draw/render stage. This will be plugged into the - * draw module as the last pipeline stage. - */ -struct draw_stage *cell_draw_render_stage( struct cell_context *cell ) -{ - struct render_stage *render = CALLOC_STRUCT(render_stage); - - render->cell = cell; - render->stage.draw = cell->draw; - render->stage.begin = render_begin; - render->stage.point = render_point; - render->stage.line = render_line; - render->stage.tri = render_tri; - render->stage.end = render_end; - render->stage.reset_stipple_counter = reset_stipple_counter; - render->stage.destroy = render_destroy; - - /* - render->quad.coef = render->coef; - render->quad.posCoef = &render->posCoef; - */ - - return &render->stage; -} diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h deleted file mode 100644 index 826dcbafeba..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_render.h +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_RENDER_H -#define CELL_RENDER_H - -struct cell_context; -struct draw_stage; - -extern void -cell_flush_prim_buffer(struct cell_context *cell); - -extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell ); - -#endif /* CELL_RENDER_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c deleted file mode 100644 index 7ffdcc51bbd..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ /dev/null @@ -1,221 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_memory.h" -#include "util/u_simple_screen.h" -#include "pipe/p_defines.h" -#include "pipe/p_screen.h" - -#include "cell/common.h" -#include "cell_context.h" -#include "cell_screen.h" -#include "cell_texture.h" -#include "cell_public.h" - -#include "state_tracker/sw_winsys.h" - - -static const char * -cell_get_vendor(struct pipe_screen *screen) -{ - return "VMware, Inc."; -} - - -static const char * -cell_get_name(struct pipe_screen *screen) -{ - return "Cell"; -} - - -static int -cell_get_param(struct pipe_screen *screen, enum pipe_cap param) -{ - switch (param) { - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return CELL_MAX_SAMPLERS; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TIMER_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 10; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return CELL_MAX_TEXTURE_LEVELS; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return CELL_MAX_TEXTURE_LEVELS; - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - return 0; /* XXX to do */ - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 1; - default: - return 0; - } -} - -static int -cell_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) -{ - switch(shader) - { - case PIPE_SHADER_FRAGMENT: - switch (param) { - case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: - return CELL_MAX_SAMPLERS; - default: - return tgsi_exec_get_shader_param(param); - } - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_GEOMETRY: - return draw_get_shader_param(shader, param); - default: - return 0; - } -} - -static float -cell_get_paramf(struct pipe_screen *screen, enum pipe_capf param) -{ - switch (param) { - case PIPE_CAPF_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAPF_MAX_LINE_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAPF_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAPF_MAX_POINT_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: - return 0.0; - - case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: - return 16.0; /* arbitrary */ - - default: - return 0; - } -} - - -static boolean -cell_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned sample_count, - unsigned tex_usage) -{ - struct sw_winsys *winsys = cell_screen(screen)->winsys; - - if (sample_count > 1) - return FALSE; - - if (tex_usage & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - if (!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) - return FALSE; - } - - /* only a few formats are known to work at this time */ - switch (format) { - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - return TRUE; - default: - return FALSE; - } -} - - -static void -cell_destroy_screen( struct pipe_screen *screen ) -{ - struct cell_screen *sp_screen = cell_screen(screen); - struct sw_winsys *winsys = sp_screen->winsys; - - if(winsys->destroy) - winsys->destroy(winsys); - - FREE(screen); -} - - - -/** - * Create a new pipe_screen object - * Note: we're not presently subclassing pipe_screen (no cell_screen) but - * that would be the place to put SPU thread/context info... - */ -struct pipe_screen * -cell_create_screen(struct sw_winsys *winsys) -{ - struct cell_screen *screen = CALLOC_STRUCT(cell_screen); - - if (!screen) - return NULL; - - screen->winsys = winsys; - screen->base.winsys = NULL; - - screen->base.destroy = cell_destroy_screen; - - screen->base.get_name = cell_get_name; - screen->base.get_vendor = cell_get_vendor; - screen->base.get_param = cell_get_param; - screen->base.get_shader_param = cell_get_shader_param; - screen->base.get_paramf = cell_get_paramf; - screen->base.is_format_supported = cell_is_format_supported; - screen->base.context_create = cell_create_context; - - cell_init_screen_texture_funcs(&screen->base); - - return &screen->base; -} diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h deleted file mode 100644 index baff9d3b7d4..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_screen.h +++ /dev/null @@ -1,55 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_SCREEN_H -#define CELL_SCREEN_H - - -#include "pipe/p_screen.h" - -struct sw_winsys; - -struct cell_screen { - struct pipe_screen base; - - struct sw_winsys *winsys; - - /* Increments whenever textures are modified. Contexts can track - * this. - */ - unsigned timestamp; -}; - -static INLINE struct cell_screen * -cell_screen( struct pipe_screen *pipe ) -{ - return (struct cell_screen *)pipe; -} - - -#endif /* CELL_SCREEN_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c deleted file mode 100644 index 39284f3a5d1..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ /dev/null @@ -1,219 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Utility/wrappers for communicating with the SPUs. - */ - - -#include <pthread.h> - -#include "cell_spu.h" -#include "pipe/p_format.h" -#include "pipe/p_state.h" -#include "util/u_memory.h" -#include "cell/common.h" - - -/* -helpful headers: -/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h -*/ - - -/** - * Cell/SPU info that's not per-context. - */ -struct cell_global_info cell_global; - - -/** - * Scan /proc/cpuinfo to determine the timebase for the system. - * This is used by the SPUs to convert 'decrementer' ticks to seconds. - * There may be a better way to get this value... - */ -static unsigned -get_timebase(void) -{ - FILE *f = fopen("/proc/cpuinfo", "r"); - unsigned timebase; - - assert(f); - while (!feof(f)) { - char line[80]; - fgets(line, sizeof(line), f); - if (strncmp(line, "timebase", 8) == 0) { - char *colon = strchr(line, ':'); - if (colon) { - timebase = atoi(colon + 2); - break; - } - } - } - fclose(f); - - return timebase; -} - - -/** - * Write a 1-word message to the given SPE mailbox. - */ -void -send_mbox_message(spe_context_ptr_t ctx, unsigned int msg) -{ - spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING); -} - - -/** - * Wait for a 1-word message to arrive in given mailbox. - */ -uint -wait_mbox_message(spe_context_ptr_t ctx) -{ - do { - unsigned data; - int count = spe_out_mbox_read(ctx, &data, 1); - - if (count == 1) { - return data; - } - - if (count < 0) { - /* error */ ; - } - } while (1); -} - - -/** - * Called by pthread_create() to spawn an SPU thread. - */ -static void * -cell_thread_function(void *arg) -{ - struct cell_init_info *init = (struct cell_init_info *) arg; - unsigned entry = SPE_DEFAULT_ENTRY; - - ASSERT_ALIGN16(init); - - if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0, - init, NULL, NULL) < 0) { - fprintf(stderr, "spe_context_run() failed\n"); - exit(1); - } - - pthread_exit(NULL); -} - - -/** - * Create the SPU threads. This is done once during driver initialization. - * This involves setting the "init" message which is sent to each SPU. - * The init message specifies an SPU id, total number of SPUs, location - * and number of batch buffers, etc. - */ -void -cell_start_spus(struct cell_context *cell) -{ - static boolean one_time_init = FALSE; - uint i, j; - uint timebase = get_timebase(); - - if (one_time_init) { - fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " - "on Cell.\n"); - abort(); - } - - one_time_init = TRUE; - - assert(cell->num_spus <= CELL_MAX_SPUS); - - ASSERT_ALIGN16(&cell_global.inits[0]); - ASSERT_ALIGN16(&cell_global.inits[1]); - - /* - * Initialize the global 'inits' structure for each SPU. - * A pointer to the init struct will be passed to each SPU. - * The SPUs will then each grab their init info with mfc_get(). - */ - for (i = 0; i < cell->num_spus; i++) { - cell_global.inits[i].id = i; - cell_global.inits[i].num_spus = cell->num_spus; - cell_global.inits[i].debug_flags = cell->debug_flags; - cell_global.inits[i].inv_timebase = 1000.0f / timebase; - - for (j = 0; j < CELL_NUM_BUFFERS; j++) { - cell_global.inits[i].buffers[j] = cell->buffer[j]; - } - cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; - - cell_global.inits[i].spu_functions = &cell->spu_functions; - - cell_global.spe_contexts[i] = spe_context_create(0, NULL); - if (!cell_global.spe_contexts[i]) { - fprintf(stderr, "spe_context_create() failed\n"); - exit(1); - } - - if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) { - fprintf(stderr, "spe_program_load() failed\n"); - exit(1); - } - - pthread_create(&cell_global.spe_threads[i], /* returned thread handle */ - NULL, /* pthread attribs */ - &cell_thread_function, /* start routine */ - &cell_global.inits[i]); /* thread argument */ - } -} - - -/** - * Tell all the SPUs to stop/exit. - * This is done when the driver's exiting / cleaning up. - */ -void -cell_spu_exit(struct cell_context *cell) -{ - uint i; - - for (i = 0; i < cell->num_spus; i++) { - send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT); - } - - /* wait for threads to exit */ - for (i = 0; i < cell->num_spus; i++) { - void *value; - pthread_join(cell_global.spe_threads[i], &value); - cell_global.spe_threads[i] = 0; - cell_global.spe_contexts[i] = 0; - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h deleted file mode 100644 index c93958a9ed5..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ /dev/null @@ -1,79 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_SPU -#define CELL_SPU - - -#include <libspe2.h> -#include <pthread.h> -#include "cell/common.h" - -#include "cell_context.h" - - -/** - * Global vars, for now anyway. - */ -struct cell_global_info -{ - /** - * SPU/SPE handles, etc - */ - spe_context_ptr_t spe_contexts[CELL_MAX_SPUS]; - pthread_t spe_threads[CELL_MAX_SPUS]; - - /** - * Data sent to SPUs at start-up - */ - struct cell_init_info inits[CELL_MAX_SPUS]; -}; - - -extern struct cell_global_info cell_global; - - -/** This is the handle for the actual SPE code */ -extern spe_program_handle_t g3d_spu; - - -extern void -send_mbox_message(spe_context_ptr_t ctx, unsigned int msg); - -extern uint -wait_mbox_message(spe_context_ptr_t ctx); - - -extern void -cell_start_spus(struct cell_context *cell); - - -extern void -cell_spu_exit(struct cell_context *cell); - - -#endif /* CELL_SPU */ diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h deleted file mode 100644 index 7adedcde57c..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ /dev/null @@ -1,65 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_STATE_H -#define CELL_STATE_H - - -#define CELL_NEW_VIEWPORT 0x1 -#define CELL_NEW_RASTERIZER 0x2 -#define CELL_NEW_FS 0x4 -#define CELL_NEW_BLEND 0x8 -#define CELL_NEW_CLIP 0x10 -#define CELL_NEW_SCISSOR 0x20 -#define CELL_NEW_STIPPLE 0x40 -#define CELL_NEW_FRAMEBUFFER 0x80 -#define CELL_NEW_ALPHA_TEST 0x100 -#define CELL_NEW_DEPTH_STENCIL 0x200 -#define CELL_NEW_SAMPLER 0x400 -#define CELL_NEW_TEXTURE 0x800 -#define CELL_NEW_VERTEX 0x1000 -#define CELL_NEW_VS 0x2000 -#define CELL_NEW_VS_CONSTANTS 0x4000 -#define CELL_NEW_FS_CONSTANTS 0x8000 -#define CELL_NEW_VERTEX_INFO 0x10000 - - -extern void -cell_update_derived( struct cell_context *cell ); - - -extern void -cell_init_shader_functions(struct cell_context *cell); - - -extern void -cell_init_vertex_functions(struct cell_context *cell); - - -#endif /* CELL_STATE_H */ - diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c deleted file mode 100644 index b723e794e71..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ /dev/null @@ -1,170 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "draw/draw_context.h" -#include "draw/draw_vertex.h" -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_state.h" -#include "cell_state_emit.h" - - -/** - * Determine how to map vertex program outputs to fragment program inputs. - * Basically, this will be used when computing the triangle interpolation - * coefficients from the post-transform vertex attributes. - */ -static void -calculate_vertex_layout( struct cell_context *cell ) -{ - const struct cell_fragment_shader_state *fs = cell->fs; - const enum interp_mode colorInterp - = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; - struct vertex_info *vinfo = &cell->vertex_info; - uint i; - int src; - -#if 0 - if (cell->vbuf) { - /* if using the post-transform vertex buffer, tell draw_vbuf to - * simply emit the whole post-xform vertex as-is: - */ - struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf; - vinfo_vbuf->num_attribs = 0; - draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); - vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4; - } -#endif - - /* reset vinfo */ - vinfo->num_attribs = 0; - - /* we always want to emit vertex pos */ - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); - - - /* - * Loop over fragment shader inputs, searching for the matching output - * from the vertex shader. - */ - for (i = 0; i < fs->info.num_inputs; i++) { - switch (fs->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - /* already done above */ - break; - - case TGSI_SEMANTIC_COLOR: - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR, - fs->info.input_semantic_index[i]); - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); - break; - - case TGSI_SEMANTIC_FOG: - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0); -#if 1 - if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ - src = 0; -#endif - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); - break; - - case TGSI_SEMANTIC_GENERIC: - /* this includes texcoords and varying vars */ - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC, - fs->info.input_semantic_index[i]); - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); - break; - - default: - assert(0); - } - } - - draw_compute_vertex_size(vinfo); - - /* XXX only signal this if format really changes */ - cell->dirty |= CELL_NEW_VERTEX_INFO; -} - - -#if 0 -/** - * Recompute cliprect from scissor bounds, scissor enable and surface size. - */ -static void -compute_cliprect(struct cell_context *sp) -{ - uint surfWidth = sp->framebuffer.width; - uint surfHeight = sp->framebuffer.height; - - if (sp->rasterizer->scissor) { - /* clip to scissor rect */ - sp->cliprect.minx = MAX2(sp->scissor.minx, 0); - sp->cliprect.miny = MAX2(sp->scissor.miny, 0); - sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); - sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); - } - else { - /* clip to surface bounds */ - sp->cliprect.minx = 0; - sp->cliprect.miny = 0; - sp->cliprect.maxx = surfWidth; - sp->cliprect.maxy = surfHeight; - } -} -#endif - - - -/** - * Update derived state, send current state to SPUs prior to rendering. - */ -void cell_update_derived( struct cell_context *cell ) -{ - if (cell->dirty & (CELL_NEW_RASTERIZER | - CELL_NEW_FS | - CELL_NEW_VS)) - calculate_vertex_layout( cell ); - -#if 0 - if (cell->dirty & (CELL_NEW_SCISSOR | - CELL_NEW_DEPTH_STENCIL_ALPHA | - CELL_NEW_FRAMEBUFFER)) - compute_cliprect(cell); -#endif - - cell_emit_state(cell); - - cell->dirty = 0; -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c deleted file mode 100644 index bb11c68fa24..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ /dev/null @@ -1,343 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_format.h" -#include "cell_context.h" -#include "cell_gen_fragment.h" -#include "cell_state.h" -#include "cell_state_emit.h" -#include "cell_batch.h" -#include "cell_texture.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" - - -/** - * Find/create a cell_command_fragment_ops object corresponding to the - * current blend/stencil/z/colormask/etc. state. - */ -static struct cell_command_fragment_ops * -lookup_fragment_ops(struct cell_context *cell) -{ - struct cell_fragment_ops_key key; - struct cell_command_fragment_ops *ops; - - /* - * Build key - */ - memset(&key, 0, sizeof(key)); - key.blend = *cell->blend; - key.blend_color = cell->blend_color; - key.dsa = *cell->depth_stencil; - - if (cell->framebuffer.cbufs[0]) - key.color_format = cell->framebuffer.cbufs[0]->format; - else - key.color_format = PIPE_FORMAT_NONE; - - if (cell->framebuffer.zsbuf) - key.zs_format = cell->framebuffer.zsbuf->format; - else - key.zs_format = PIPE_FORMAT_NONE; - - /* - * Look up key in cache. - */ - ops = (struct cell_command_fragment_ops *) - util_keymap_lookup(cell->fragment_ops_cache, &key); - - /* - * If not found, create/save new fragment ops command. - */ - if (!ops) { - struct spe_function spe_code_front, spe_code_back; - unsigned int facing_dependent, total_code_size; - - if (0) - debug_printf("**** Create New Fragment Ops\n"); - - /* Prepare the buffer that will hold the generated code. The - * "0" passed in for the size means that the SPE code will - * use a default size. - */ - spe_init_func(&spe_code_front, 0); - spe_init_func(&spe_code_back, 0); - - /* Generate new code. Always generate new code for both front-facing - * and back-facing fragments, even if it's the same code in both - * cases. - */ - cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front); - cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back); - - /* Make sure the code is a multiple of 8 bytes long; this is - * required to ensure that the dual pipe instruction alignment - * is correct. It's also important for the SPU unpacking, - * which assumes 8-byte boundaries. - */ - unsigned int front_code_size = spe_code_size(&spe_code_front); - while (front_code_size % 8 != 0) { - spe_lnop(&spe_code_front); - front_code_size = spe_code_size(&spe_code_front); - } - unsigned int back_code_size = spe_code_size(&spe_code_back); - while (back_code_size % 8 != 0) { - spe_lnop(&spe_code_back); - back_code_size = spe_code_size(&spe_code_back); - } - - /* Determine whether the code we generated is facing-dependent, by - * determining whether the generated code is different for the front- - * and back-facing fragments. - */ - if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) { - /* Code is identical; only need one copy. */ - facing_dependent = 0; - total_code_size = front_code_size; - } - else { - /* Code is different for front-facing and back-facing fragments. - * Need to send both copies. - */ - facing_dependent = 1; - total_code_size = front_code_size + back_code_size; - } - - /* alloc new fragment ops command. Note that this structure - * has variant length based on the total code size required. - */ - ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size); - /* populate the new cell_command_fragment_ops object */ - ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS; - ops->total_code_size = total_code_size; - ops->front_code_index = 0; - memcpy(ops->code, spe_code_front.store, front_code_size); - if (facing_dependent) { - /* We have separate front- and back-facing code. Append the - * back-facing code to the buffer. Be careful because the code - * size is in bytes, but the buffer is of unsigned elements. - */ - ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]); - memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size); - } - else { - /* Use the same code for front- and back-facing fragments */ - ops->back_code_index = ops->front_code_index; - } - - /* Set the fields for the fallback case. Note that these fields - * (and the whole fallback case) will eventually go away. - */ - ops->dsa = *cell->depth_stencil; - ops->blend = *cell->blend; - ops->blend_color = cell->blend_color; - - /* insert cell_command_fragment_ops object into keymap/cache */ - util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); - - /* release rtasm buffer */ - spe_release_func(&spe_code_front); - spe_release_func(&spe_code_back); - } - else { - if (0) - debug_printf("**** Re-use Fragment Ops\n"); - } - - return ops; -} - - - -static void -emit_state_cmd(struct cell_context *cell, uint cmd, - const void *state, uint state_size) -{ - uint32_t *dst = (uint32_t *) - cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size)); - *dst = cmd; - memcpy(dst + 4, state, state_size); -} - - -/** - * For state marked as 'dirty', construct a state-update command block - * and insert it into the current batch buffer. - */ -void -cell_emit_state(struct cell_context *cell) -{ - if (cell->dirty & CELL_NEW_FRAMEBUFFER) { - struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; - struct pipe_surface *zbuf = cell->framebuffer.zsbuf; - STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0); - struct cell_command_framebuffer *fb - = cell_batch_alloc16(cell, sizeof(*fb)); - fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER; - fb->color_start = cell->cbuf_map[0]; - fb->color_format = cbuf->format; - fb->depth_start = cell->zsbuf_map; - fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; - fb->width = cell->framebuffer.width; - fb->height = cell->framebuffer.height; -#if 0 - printf("EMIT color format %s\n", util_format_name(fb->color_format)); - printf("EMIT depth format %s\n", util_format_name(fb->depth_format)); -#endif - } - - if (cell->dirty & (CELL_NEW_RASTERIZER)) { - STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0); - struct cell_command_rasterizer *rast = - cell_batch_alloc16(cell, sizeof(*rast)); - rast->opcode[0] = CELL_CMD_STATE_RASTERIZER; - rast->rasterizer = *cell->rasterizer; - } - - if (cell->dirty & (CELL_NEW_FS)) { - /* Send new fragment program to SPUs */ - STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0); - struct cell_command_fragment_program *fp - = cell_batch_alloc16(cell, sizeof(*fp)); - fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM; - fp->num_inst = cell->fs->code.num_inst; - memcpy(&fp->code, cell->fs->code.store, - SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); - if (0) { - int i; - printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n"); - for (i = 0; i < fp->num_inst; i++) { - printf(" %3d: 0x%08x\n", i, fp->code[i]); - } - } - } - - if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { - const uint shader = PIPE_SHADER_FRAGMENT; - const uint num_const = cell->constants[shader]->width0 / sizeof(float); - uint i, j; - float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); - uint32_t *ibuf = (uint32_t *) buf; - const float *constants = cell->mapped_constants[shader]; - ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; - ibuf[4] = num_const; - j = 8; - for (i = 0; i < num_const; i++) { - buf[j++] = constants[i]; - } - } - - if (cell->dirty & (CELL_NEW_FRAMEBUFFER | - CELL_NEW_DEPTH_STENCIL | - CELL_NEW_BLEND)) { - struct cell_command_fragment_ops *fops, *fops_cmd; - /* Note that cell_command_fragment_ops is a variant-sized record */ - fops = lookup_fragment_ops(cell); - fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size)); - memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size); - } - - if (cell->dirty & CELL_NEW_SAMPLER) { - uint i; - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - if (cell->dirty_samplers & (1 << i)) { - if (cell->sampler[i]) { - STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0); - struct cell_command_sampler *sampler - = cell_batch_alloc16(cell, sizeof(*sampler)); - sampler->opcode[0] = CELL_CMD_STATE_SAMPLER; - sampler->unit = i; - sampler->state = *cell->sampler[i]; - } - } - } - cell->dirty_samplers = 0x0; - } - - if (cell->dirty & CELL_NEW_TEXTURE) { - uint i; - for (i = 0;i < CELL_MAX_SAMPLERS; i++) { - if (cell->dirty_textures & (1 << i)) { - STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0); - struct cell_command_texture *texture = - (struct cell_command_texture *) - cell_batch_alloc16(cell, sizeof(*texture)); - - texture->opcode[0] = CELL_CMD_STATE_TEXTURE; - texture->unit = i; - if (cell->texture[i]) { - struct cell_resource *ct = cell->texture[i]; - uint level; - for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { - texture->start[level] = (ct->mapped + - ct->level_offset[level]); - texture->width[level] = u_minify(ct->base.width0, level); - texture->height[level] = u_minify(ct->base.height0, level); - texture->depth[level] = u_minify(ct->base.depth0, level); - } - texture->target = ct->base.target; - } - else { - uint level; - for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { - texture->start[level] = NULL; - texture->width[level] = 0; - texture->height[level] = 0; - texture->depth[level] = 0; - } - texture->target = 0; - } - } - } - cell->dirty_textures = 0x0; - } - - if (cell->dirty & CELL_NEW_VERTEX_INFO) { - emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, - &cell->vertex_info, sizeof(struct vertex_info)); - } - -#if 0 - if (cell->dirty & CELL_NEW_VS) { - const struct draw_context *const draw = cell->draw; - struct cell_shader_info info; - - info.num_outputs = draw_num_shader_outputs(draw); - info.declarations = (uintptr_t) draw->vs.machine.Declarations; - info.num_declarations = draw->vs.machine.NumDeclarations; - info.instructions = (uintptr_t) draw->vs.machine.Instructions; - info.num_instructions = draw->vs.machine.NumInstructions; - info.immediates = (uintptr_t) draw->vs.machine.Imms; - info.num_immediates = draw->vs.machine.ImmLimit / 4; - - emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info)); - } -#endif -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h deleted file mode 100644 index 59f8affe8d3..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.h +++ /dev/null @@ -1,36 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_STATE_EMIT_H -#define CELL_STATE_EMIT_H - - -extern void -cell_emit_state(struct cell_context *cell); - - -#endif /* CELL_STATE_EMIT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c deleted file mode 100644 index dc33e7ccc2c..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ /dev/null @@ -1,1432 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file - * Generate code to perform all per-fragment operations. - * - * Code generated by these functions perform both alpha, depth, and stencil - * testing as well as alpha blending. - * - * \note - * Occlusion query is not supported, but this is the right place to add that - * support. - * - * \author Ian Romanick <[email protected]> - */ - -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "cell_context.h" - -#include "rtasm/rtasm_ppc_spe.h" - - -/** - * Generate code to perform alpha testing. - * - * The code generated by this function uses the register specificed by - * \c mask as both an input and an output. - * - * \param dsa Current alpha-test state - * \param f Function to which code should be appended - * \param mask Index of register containing active fragment mask - * \param alphas Index of register containing per-fragment alpha values - * - * \note Emits a maximum of 6 instructions. - */ -static void -emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, int mask, int alphas) -{ - /* If the alpha function is either NEVER or ALWAYS, there is no need to - * load the reference value into a register. ALWAYS is a fairly common - * case, and this optimization saves 2 instructions. - */ - if (dsa->alpha.enabled - && (dsa->alpha.func != PIPE_FUNC_NEVER) - && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { - int ref = spe_allocate_available_register(f); - int tmp_a = spe_allocate_available_register(f); - int tmp_b = spe_allocate_available_register(f); - union { - float f; - unsigned u; - } ref_val; - boolean complement = FALSE; - - ref_val.f = dsa->alpha.ref; - - spe_il(f, ref, ref_val.u & 0x0000ffff); - spe_ilh(f, ref, ref_val.u >> 16); - - switch (dsa->alpha.func) { - case PIPE_FUNC_NOTEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_EQUAL: - spe_fceq(f, tmp_a, ref, alphas); - break; - - case PIPE_FUNC_LEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_GREATER: - spe_fcgt(f, tmp_a, ref, alphas); - break; - - case PIPE_FUNC_LESS: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_GEQUAL: - spe_fcgt(f, tmp_a, ref, alphas); - spe_fceq(f, tmp_b, ref, alphas); - spe_or(f, tmp_a, tmp_b, tmp_a); - break; - - case PIPE_FUNC_ALWAYS: - case PIPE_FUNC_NEVER: - default: - assert(0); - break; - } - - if (complement) { - spe_andc(f, mask, mask, tmp_a); - } else { - spe_and(f, mask, mask, tmp_a); - } - - spe_release_register(f, ref); - spe_release_register(f, tmp_a); - spe_release_register(f, tmp_b); - } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { - spe_il(f, mask, 0); - } -} - - -/** - * Generate code to perform Z testing. Four Z values are tested at once. - * \param dsa Current depth-test state - * \param f Function to which code should be appended - * \param mask Index of register to contain depth-pass mask - * \param stored Index of register containing values from depth buffer - * \param calculated Index of register containing per-fragment depth values - * - * \return - * If the calculated depth comparison mask is the actual mask, \c FALSE is - * returned. If the calculated depth comparison mask is the compliment of - * the actual mask, \c TRUE is returned. - * - * \note Emits a maximum of 3 instructions. - */ -static boolean -emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, int mask, int stored, int calculated) -{ - unsigned func = (dsa->depth.enabled) - ? dsa->depth.func : PIPE_FUNC_ALWAYS; - int tmp = spe_allocate_available_register(f); - boolean compliment = FALSE; - - switch (func) { - case PIPE_FUNC_NEVER: - spe_il(f, mask, 0); - break; - - case PIPE_FUNC_NOTEQUAL: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_EQUAL: - spe_ceq(f, mask, calculated, stored); - break; - - case PIPE_FUNC_LEQUAL: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GREATER: - spe_clgt(f, mask, calculated, stored); - break; - - case PIPE_FUNC_LESS: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GEQUAL: - spe_clgt(f, mask, calculated, stored); - spe_ceq(f, tmp, calculated, stored); - spe_or(f, mask, mask, tmp); - break; - - case PIPE_FUNC_ALWAYS: - spe_il(f, mask, ~0); - break; - - default: - assert(0); - break; - } - - spe_release_register(f, tmp); - return compliment; -} - - -/** - * Generate code to apply the stencil operation (after testing). - * \note Emits a maximum of 5 instructions. - * - * \warning - * Since \c out and \c in might be the same register, this routine cannot - * generate code that uses \c out as a temporary. - */ -static void -emit_stencil_op(struct spe_function *f, - int out, int in, int mask, unsigned op, unsigned ref) -{ - const int clamp = spe_allocate_available_register(f); - const int clamp_mask = spe_allocate_available_register(f); - const int result = spe_allocate_available_register(f); - - switch(op) { - case PIPE_STENCIL_OP_KEEP: - assert(0); - case PIPE_STENCIL_OP_ZERO: - spe_il(f, result, 0); - break; - case PIPE_STENCIL_OP_REPLACE: - spe_il(f, result, ref); - break; - case PIPE_STENCIL_OP_INCR: - /* clamp = [0xff, 0xff, 0xff, 0xff] */ - spe_il(f, clamp, 0x0ff); - /* result[i] = in[i] + 1 */ - spe_ai(f, result, in, 1); - /* clamp_mask[i] = (result[i] > 0xff) */ - spe_clgti(f, clamp_mask, result, 0x0ff); - /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */ - spe_selb(f, result, result, clamp, clamp_mask); - break; - case PIPE_STENCIL_OP_DECR: - spe_il(f, clamp, 0); - spe_ai(f, result, in, -1); - - /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned - * arithmetic. - */ - spe_clgti(f, clamp_mask, result, 0x0ff); - spe_selb(f, result, result, clamp, clamp_mask); - break; - case PIPE_STENCIL_OP_INCR_WRAP: - spe_ai(f, result, in, 1); - break; - case PIPE_STENCIL_OP_DECR_WRAP: - spe_ai(f, result, in, -1); - break; - case PIPE_STENCIL_OP_INVERT: - spe_nor(f, result, in, in); - break; - default: - assert(0); - } - - spe_selb(f, out, in, result, mask); - - spe_release_register(f, result); - spe_release_register(f, clamp_mask); - spe_release_register(f, clamp); -} - - -/** - * Generate code to do stencil test. Four pixels are tested at once. - * \param dsa Depth / stencil test state - * \param face 0 for front face, 1 for back face - * \param f Function to append instructions to - * \param mask Register containing mask of fragments passing the - * alpha test - * \param depth_mask Register containing mask of fragments passing the - * depth test - * \param depth_compliment Is \c depth_mask the compliment of the actual mask? - * \param stencil Register containing values from stencil buffer - * \param depth_pass Register to store mask of fragments passing stencil test - * and depth test - * - * \note - * Emits a maximum of 10 + (3 * 5) = 25 instructions. - */ -static int -emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, - struct pipe_stencil_ref *sr, - unsigned face, - struct spe_function *f, - int mask, - int depth_mask, - boolean depth_complement, - int stencil, - int depth_pass) -{ - int stencil_fail = spe_allocate_available_register(f); - int depth_fail = spe_allocate_available_register(f); - int stencil_mask = spe_allocate_available_register(f); - int stencil_pass = spe_allocate_available_register(f); - int face_stencil = spe_allocate_available_register(f); - int stencil_src = stencil; - const unsigned ref = (sr->ref_value[face] - & dsa->stencil[face].valuemask); - boolean complement = FALSE; - int stored; - int tmp = spe_allocate_available_register(f); - - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) - && (dsa->stencil[face].valuemask != 0x0ff)) { - stored = spe_allocate_available_register(f); - spe_andi(f, stored, stencil, dsa->stencil[face].valuemask); - } else { - stored = stencil; - } - - - switch (dsa->stencil[face].func) { - case PIPE_FUNC_NEVER: - spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */ - break; - - case PIPE_FUNC_NOTEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_EQUAL: - /* stencil_mask[i] = (stored[i] == ref) */ - spe_ceqi(f, stencil_mask, stored, ref); - break; - - case PIPE_FUNC_LEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GREATER: - complement = TRUE; - /* stencil_mask[i] = (stored[i] > ref) */ - spe_clgti(f, stencil_mask, stored, ref); - break; - - case PIPE_FUNC_LESS: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GEQUAL: - /* stencil_mask[i] = (stored[i] > ref) */ - spe_clgti(f, stencil_mask, stored, ref); - /* tmp[i] = (stored[i] == ref) */ - spe_ceqi(f, tmp, stored, ref); - /* stencil_mask[i] = stencil_mask[i] | tmp[i] */ - spe_or(f, stencil_mask, stencil_mask, tmp); - break; - - case PIPE_FUNC_ALWAYS: - /* See comment below. */ - break; - - default: - assert(0); - break; - } - - if (stored != stencil) { - spe_release_register(f, stored); - } - spe_release_register(f, tmp); - - - /* ALWAYS is a very common stencil-test, so some effort is applied to - * optimize that case. The stencil-pass mask is the same as the input - * fragment mask. This makes the stencil-test (above) a no-op, and the - * input fragment mask can be "renamed" the stencil-pass mask. - */ - if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { - spe_release_register(f, stencil_pass); - stencil_pass = mask; - } else { - if (complement) { - spe_andc(f, stencil_pass, mask, stencil_mask); - } else { - spe_and(f, stencil_pass, mask, stencil_mask); - } - } - - if (depth_complement) { - spe_andc(f, depth_pass, stencil_pass, depth_mask); - } else { - spe_and(f, depth_pass, stencil_pass, depth_mask); - } - - - /* Conditionally emit code to update the stencil value under various - * condititons. Note that there is no need to generate code under the - * following circumstances: - * - * - Stencil write mask is zero. - * - For stencil-fail if the stencil test is ALWAYS - * - For depth-fail if the stencil test is NEVER - * - For depth-pass if the stencil test is NEVER - * - Any of the 3 conditions if the operation is KEEP - */ - if (dsa->stencil[face].writemask != 0) { - if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) - && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { - if (complement) { - spe_and(f, stencil_fail, mask, stencil_mask); - } else { - spe_andc(f, stencil_fail, mask, stencil_mask); - } - - emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, - dsa->stencil[face].fail_op, - sr->ref_value[face]); - - stencil_src = face_stencil; - } - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { - if (depth_complement) { - spe_and(f, depth_fail, stencil_pass, depth_mask); - } else { - spe_andc(f, depth_fail, stencil_pass, depth_mask); - } - - emit_stencil_op(f, face_stencil, stencil_src, depth_fail, - dsa->stencil[face].zfail_op, - sr->ref_value[face]); - stencil_src = face_stencil; - } - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { - emit_stencil_op(f, face_stencil, stencil_src, depth_pass, - dsa->stencil[face].zpass_op, - sr->ref_value[face]); - stencil_src = face_stencil; - } - } - - spe_release_register(f, stencil_fail); - spe_release_register(f, depth_fail); - spe_release_register(f, stencil_mask); - if (stencil_pass != mask) { - spe_release_register(f, stencil_pass); - } - - /* If all of the stencil operations were KEEP or the stencil write mask was - * zero, "stencil_src" will still be set to "stencil". In this case - * release the "face_stencil" register. Otherwise apply the stencil write - * mask to select bits from the calculated stencil value and the previous - * stencil value. - */ - if (stencil_src == stencil) { - spe_release_register(f, face_stencil); - } else if (dsa->stencil[face].writemask != 0x0ff) { - int tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, dsa->stencil[face].writemask); - spe_selb(f, stencil_src, stencil, stencil_src, tmp); - - spe_release_register(f, tmp); - } - - return stencil_src; -} - - -void -cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa, - struct pipe_stencil_ref *sr) -{ - struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; - struct spe_function *const f = &cdsa->code; - - /* This code generates a maximum of 6 (alpha test) + 3 (depth test) - * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round - * up to 64 to make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - /* Allocate registers for the function's input parameters. Cleverly (and - * clever code is usually dangerous, but I couldn't resist) the generated - * function returns a structure. Returned structures start with register - * 3, and the structure fields are ordered to match up exactly with the - * input parameters. - */ - int mask = spe_allocate_register(f, 3); - int depth = spe_allocate_register(f, 4); - int stencil = spe_allocate_register(f, 5); - int zvals = spe_allocate_register(f, 6); - int frag_a = spe_allocate_register(f, 7); - int facing = spe_allocate_register(f, 8); - - int depth_mask = spe_allocate_available_register(f); - - boolean depth_complement; - - - emit_alpha_test(dsa, f, mask, frag_a); - - depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); - - if (dsa->stencil[0].enabled) { - const int front_depth_pass = spe_allocate_available_register(f); - int front_stencil = emit_stencil_test(dsa, sr, 0, f, mask, - depth_mask, depth_complement, - stencil, front_depth_pass); - - if (dsa->stencil[1].enabled) { - const int back_depth_pass = spe_allocate_available_register(f); - int back_stencil = emit_stencil_test(dsa, sr, 1, f, mask, - depth_mask, depth_complement, - stencil, back_depth_pass); - - /* If the front facing stencil value and the back facing stencil - * value are stored in the same register, there is no need to select - * a value based on the facing. This can happen if the stencil value - * was not modified due to the write masks being zero, the stencil - * operations being KEEP, etc. - */ - if (front_stencil != back_stencil) { - spe_selb(f, stencil, back_stencil, front_stencil, facing); - } - - if (back_stencil != stencil) { - spe_release_register(f, back_stencil); - } - - if (front_stencil != stencil) { - spe_release_register(f, front_stencil); - } - - spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); - - spe_release_register(f, back_depth_pass); - } else { - if (front_stencil != stencil) { - spe_or(f, stencil, front_stencil, front_stencil); - spe_release_register(f, front_stencil); - } - spe_or(f, mask, front_depth_pass, front_depth_pass); - } - - spe_release_register(f, front_depth_pass); - } else if (dsa->depth.enabled) { - if (depth_complement) { - spe_andc(f, mask, mask, depth_mask); - } else { - spe_and(f, mask, mask, depth_mask); - } - } - - if (dsa->depth.writemask) { - spe_selb(f, depth, depth, zvals, mask); - } - - spe_bi(f, 0, 0, 0); /* return from function call */ - - -#if 0 - { - const uint32_t *p = f->store; - unsigned i; - - printf("# alpha (%sabled)\n", - (dsa->alpha.enabled) ? "en" : "dis"); - printf("# func: %u\n", dsa->alpha.func); - printf("# ref: %.2f\n", dsa->alpha.ref); - - printf("# depth (%sabled)\n", - (dsa->depth.enabled) ? "en" : "dis"); - printf("# func: %u\n", dsa->depth.func); - - for (i = 0; i < 2; i++) { - printf("# %s stencil (%sabled)\n", - (i == 0) ? "front" : "back", - (dsa->stencil[i].enabled) ? "en" : "dis"); - - printf("# func: %u\n", dsa->stencil[i].func); - printf("# op (sf, zf, zp): %u %u %u\n", - dsa->stencil[i].fail_op, - dsa->stencil[i].zfail_op, - dsa->stencil[i].zpass_op); - printf("# ref value / value mask / write mask: %02x %02x %02x\n", - sr->ref_value[i], - dsa->stencil[i].valuemask, - dsa->stencil[i].writemask); - } - - printf("\t.text\n"); - for (/* empty */; p < f->csr; p++) { - printf("\t.long\t0x%04x\n", *p); - } - fflush(stdout); - } -#endif -} - - -/** - * \note Emits a maximum of 3 instructions - */ -static int -emit_alpha_factor_calculation(struct spe_function *f, - unsigned factor, - int src_alpha, int dst_alpha, int const_alpha) -{ - int factor_reg; - int tmp; - - - switch (factor) { - case PIPE_BLENDFACTOR_ONE: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA: - factor_reg = spe_allocate_available_register(f); - - spe_or(f, factor_reg, src_alpha, src_alpha); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: - factor_reg = dst_alpha; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - factor_reg = spe_allocate_available_register(f); - - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, const_alpha); - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: - factor_reg = const_alpha; - break; - - case PIPE_BLENDFACTOR_ZERO: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - tmp = spe_allocate_available_register(f); - factor_reg = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, src_alpha); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - tmp = spe_allocate_available_register(f); - factor_reg = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, dst_alpha); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - assert(0); - factor_reg = -1; - break; - } - - return factor_reg; -} - - -/** - * \note Emits a maximum of 6 instructions - */ -static void -emit_color_factor_calculation(struct spe_function *f, - unsigned sF, unsigned mask, - const int *src, - const int *dst, - const int *const_color, - int *factor) -{ - int tmp; - unsigned i; - - - factor[0] = -1; - factor[1] = -1; - factor[2] = -1; - factor[3] = -1; - - switch (sF) { - case PIPE_BLENDFACTOR_ONE: - break; - - case PIPE_BLENDFACTOR_SRC_COLOR: - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_or(f, factor[i], src[i], src[i]); - } - } - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA: - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_or(f, factor[0], src[3], src[3]); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: - factor[0] = dst[3]; - factor[1] = dst[3]; - factor[2] = dst[3]; - break; - - case PIPE_BLENDFACTOR_DST_COLOR: - factor[0] = dst[0]; - factor[1] = dst[1]; - factor[2] = dst[2]; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - /* Alpha saturate means min(As, 1-Ad). - */ - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, tmp, tmp, dst[3]); - spe_fcgt(f, factor[0], tmp, src[3]); - spe_selb(f, factor[0], src[3], tmp, factor[0]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; i++) { - factor[i] = spe_allocate_available_register(f); - - spe_fs(f, factor[i], tmp, const_color[i]); - } - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_COLOR: - for (i = 0; i < 3; i++) { - factor[i] = const_color[i]; - } - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, const_color[3]); - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: - factor[0] = const_color[3]; - factor[1] = factor[0]; - factor[2] = factor[0]; - break; - - case PIPE_BLENDFACTOR_ZERO: - break; - - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_fs(f, factor[i], tmp, src[i]); - } - } - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, src[3]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, dst[3]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_COLOR: - tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_fs(f, factor[i], tmp, dst[i]); - } - } - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - assert(0); - } -} - - -static void -emit_blend_calculation(struct spe_function *f, - unsigned func, unsigned sF, unsigned dF, - int src, int src_factor, int dst, int dst_factor) -{ - int tmp = spe_allocate_available_register(f); - - switch (func) { - case PIPE_BLEND_ADD: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - /* Do nothing. */ - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fa(f, src, src, dst); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_or(f, src, dst, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, dst, dst_factor); - spe_fma(f, src, src, src_factor, tmp); - } - break; - - case PIPE_BLEND_SUBTRACT: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - /* Do nothing. */ - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fs(f, src, src, dst); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_il(f, tmp, 0); - spe_fs(f, src, tmp, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, dst, dst_factor); - spe_fms(f, src, src, src_factor, tmp); - } - break; - - case PIPE_BLEND_REVERSE_SUBTRACT: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, tmp, 0); - spe_fs(f, src, tmp, src); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fs(f, src, dst, src); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_or(f, src, dst, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, src, src_factor); - spe_fms(f, src, src, dst_factor, tmp); - } - break; - - case PIPE_BLEND_MIN: - spe_cgt(f, tmp, src, dst); - spe_selb(f, src, src, dst, tmp); - break; - - case PIPE_BLEND_MAX: - spe_cgt(f, tmp, src, dst); - spe_selb(f, src, dst, src, tmp); - break; - - default: - assert(0); - } - - spe_release_register(f, tmp); -} - - -/** - * Generate code to perform alpha blending on the SPE - */ -void -cell_generate_alpha_blend(struct cell_blend_state *cb) -{ - struct pipe_blend_state *const b = &cb->base; - struct spe_function *const f = &cb->code; - - /* This code generates a maximum of 3 (source alpha factor) - * + 3 (destination alpha factor) + (3 * 6) (source color factor) - * + (3 * 6) (destination color factor) + (4 * 2) (blend equation) - * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to - * make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - const int frag[4] = { - spe_allocate_register(f, 3), - spe_allocate_register(f, 4), - spe_allocate_register(f, 5), - spe_allocate_register(f, 6), - }; - const int pixel[4] = { - spe_allocate_register(f, 7), - spe_allocate_register(f, 8), - spe_allocate_register(f, 9), - spe_allocate_register(f, 10), - }; - const int const_color[4] = { - spe_allocate_register(f, 11), - spe_allocate_register(f, 12), - spe_allocate_register(f, 13), - spe_allocate_register(f, 14), - }; - unsigned func[4]; - unsigned sF[4]; - unsigned dF[4]; - unsigned i; - int src_factor[4]; - int dst_factor[4]; - - - /* Does the selected blend mode make use of the source / destination - * color (RGB) blend factors? - */ - boolean need_color_factor = b->rt[0].blend_enable - && (b->rt[0].rgb_func != PIPE_BLEND_MIN) - && (b->rt[0].rgb_func != PIPE_BLEND_MAX); - - /* Does the selected blend mode make use of the source / destination - * alpha blend factors? - */ - boolean need_alpha_factor = b->rt[0].blend_enable - && (b->rt[0].alpha_func != PIPE_BLEND_MIN) - && (b->rt[0].alpha_func != PIPE_BLEND_MAX); - - - if (b->rt[0].blend_enable) { - sF[0] = b->rt[0].rgb_src_factor; - sF[1] = sF[0]; - sF[2] = sF[0]; - switch (b->rt[0].alpha_src_factor & 0x0f) { - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - sF[3] = PIPE_BLENDFACTOR_ONE; - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_SRC1_COLOR: - sF[3] = b->rt[0].alpha_src_factor + 1; - break; - default: - sF[3] = b->rt[0].alpha_src_factor; - } - - dF[0] = b->rt[0].rgb_dst_factor; - dF[1] = dF[0]; - dF[2] = dF[0]; - switch (b->rt[0].alpha_dst_factor & 0x0f) { - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_SRC1_COLOR: - dF[3] = b->rt[0].alpha_dst_factor + 1; - break; - default: - dF[3] = b->rt[0].alpha_dst_factor; - } - - func[0] = b->rt[0].rgb_func; - func[1] = func[0]; - func[2] = func[0]; - func[3] = b->rt[0].alpha_func; - } else { - sF[0] = PIPE_BLENDFACTOR_ONE; - sF[1] = PIPE_BLENDFACTOR_ONE; - sF[2] = PIPE_BLENDFACTOR_ONE; - sF[3] = PIPE_BLENDFACTOR_ONE; - dF[0] = PIPE_BLENDFACTOR_ZERO; - dF[1] = PIPE_BLENDFACTOR_ZERO; - dF[2] = PIPE_BLENDFACTOR_ZERO; - dF[3] = PIPE_BLENDFACTOR_ZERO; - - func[0] = PIPE_BLEND_ADD; - func[1] = PIPE_BLEND_ADD; - func[2] = PIPE_BLEND_ADD; - func[3] = PIPE_BLEND_ADD; - } - - - /* If alpha writing is enabled and the alpha blend mode requires use of - * the alpha factor, calculate the alpha factor. - */ - if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) { - src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3], - frag[3], pixel[3]); - - /* If the alpha destination blend factor is the same as the alpha source - * blend factor, re-use the previously calculated value. - */ - dst_factor[3] = (dF[3] == sF[3]) - ? src_factor[3] - : emit_alpha_factor_calculation(f, dF[3], const_color[3], - frag[3], pixel[3]); - } - - - if (sF[0] == sF[3]) { - src_factor[0] = src_factor[3]; - src_factor[1] = src_factor[3]; - src_factor[2] = src_factor[3]; - } else if (sF[0] == dF[3]) { - src_factor[0] = dst_factor[3]; - src_factor[1] = dst_factor[3]; - src_factor[2] = dst_factor[3]; - } else if (need_color_factor) { - emit_color_factor_calculation(f, - b->rt[0].rgb_src_factor, - b->rt[0].colormask, - frag, pixel, const_color, src_factor); - } - - - if (dF[0] == sF[3]) { - dst_factor[0] = src_factor[3]; - dst_factor[1] = src_factor[3]; - dst_factor[2] = src_factor[3]; - } else if (dF[0] == dF[3]) { - dst_factor[0] = dst_factor[3]; - dst_factor[1] = dst_factor[3]; - dst_factor[2] = dst_factor[3]; - } else if (dF[0] == sF[0]) { - dst_factor[0] = src_factor[0]; - dst_factor[1] = src_factor[1]; - dst_factor[2] = src_factor[2]; - } else if (need_color_factor) { - emit_color_factor_calculation(f, - b->rt[0].rgb_dst_factor, - b->rt[0].colormask, - frag, pixel, const_color, dst_factor); - } - - - - for (i = 0; i < 4; ++i) { - if ((b->rt[0].colormask & (1U << i)) != 0) { - emit_blend_calculation(f, - func[i], sF[i], dF[i], - frag[i], src_factor[i], - pixel[i], dst_factor[i]); - } - } - - spe_bi(f, 0, 0, 0); - -#if 0 - { - const uint32_t *p = f->store; - - printf("# %u instructions\n", f->csr - f->store); - printf("# blend (%sabled)\n", - (cb->base.blend_enable) ? "en" : "dis"); - printf("# RGB func / sf / df: %u %u %u\n", - cb->base.rgb_func, - cb->base.rgb_src_factor, - cb->base.rgb_dst_factor); - printf("# ALP func / sf / df: %u %u %u\n", - cb->base.alpha_func, - cb->base.alpha_src_factor, - cb->base.alpha_dst_factor); - - printf("\t.text\n"); - for (/* empty */; p < f->csr; p++) { - printf("\t.long\t0x%04x\n", *p); - } - fflush(stdout); - } -#endif -} - - -static int -PC_OFFSET(const struct spe_function *f, const void *d) -{ - const intptr_t pc = (intptr_t) &f->store[f->num_inst]; - const intptr_t ea = ~0x0f & (intptr_t) d; - - return (ea - pc) >> 2; -} - - -/** - * Generate code to perform color conversion and logic op - * - * \bug - * The code generated by this function should also perform dithering. - * - * \bug - * The code generated by this function should also perform color-write - * masking. - * - * \bug - * Only two framebuffer formats are supported at this time. - */ -void -cell_generate_logic_op(struct spe_function *f, - const struct pipe_blend_state *blend, - struct pipe_surface *surf) -{ - const unsigned logic_op = (blend->logicop_enable) - ? blend->logicop_func : PIPE_LOGICOP_COPY; - - /* This code generates a maximum of 37 instructions. An additional 32 - * bytes (equiv. to 8 instructions) are needed for data storage. Round up - * to 64 to make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - /* Pixel colors in framebuffer format in AoS layout. - */ - const int pixel[4] = { - spe_allocate_register(f, 3), - spe_allocate_register(f, 4), - spe_allocate_register(f, 5), - spe_allocate_register(f, 6), - }; - - /* Fragment colors stored as floats in SoA layout. - */ - const int frag[4] = { - spe_allocate_register(f, 7), - spe_allocate_register(f, 8), - spe_allocate_register(f, 9), - spe_allocate_register(f, 10), - }; - - const int mask = spe_allocate_register(f, 11); - - - /* Short-circuit the noop and invert cases. - */ - if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) { - spe_bi(f, 0, 0, 0); - return; - } else if (logic_op == PIPE_LOGICOP_INVERT) { - spe_nor(f, pixel[0], pixel[0], pixel[0]); - spe_nor(f, pixel[1], pixel[1], pixel[1]); - spe_nor(f, pixel[2], pixel[2], pixel[2]); - spe_nor(f, pixel[3], pixel[3], pixel[3]); - spe_bi(f, 0, 0, 0); - return; - } - - - const int tmp[4] = { - spe_allocate_available_register(f), - spe_allocate_available_register(f), - spe_allocate_available_register(f), - spe_allocate_available_register(f), - }; - - const int shuf_xpose_hi = spe_allocate_available_register(f); - const int shuf_xpose_lo = spe_allocate_available_register(f); - const int shuf_color = spe_allocate_available_register(f); - - - /* Pointer to the begining of the function's private data area. - */ - uint32_t *const data = ((uint32_t *) f->store) + (64 - 8); - - - /* Convert fragment colors to framebuffer format in AoS layout. - */ - switch (surf->format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - data[0] = 0x00010203; - data[1] = 0x10111213; - data[2] = 0x04050607; - data[3] = 0x14151617; - data[4] = 0x0c000408; - data[5] = 0x80808080; - data[6] = 0x80808080; - data[7] = 0x80808080; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - data[0] = 0x03020100; - data[1] = 0x13121110; - data[2] = 0x07060504; - data[3] = 0x17161514; - data[4] = 0x0804000c; - data[5] = 0x80808080; - data[6] = 0x80808080; - data[7] = 0x80808080; - break; - default: - fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()"); - ASSERT(0); - } - - spe_ilh(f, tmp[0], 0x0808); - spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0)); - spe_lqr(f, shuf_color, PC_OFFSET(f, data+4)); - spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]); - - spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi); - spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo); - spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi); - spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo); - - spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi); - spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo); - spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi); - spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo); - - spe_cfltu(f, frag[0], frag[0], 32); - spe_cfltu(f, frag[1], frag[1], 32); - spe_cfltu(f, frag[2], frag[2], 32); - spe_cfltu(f, frag[3], frag[3], 32); - - spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color); - spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color); - spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color); - spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color); - - - /* If logic op is enabled, perform the requested logical operation on the - * converted fragment colors and the pixel colors. - */ - switch (logic_op) { - case PIPE_LOGICOP_CLEAR: - spe_il(f, frag[0], 0); - spe_il(f, frag[1], 0); - spe_il(f, frag[2], 0); - spe_il(f, frag[3], 0); - break; - case PIPE_LOGICOP_NOR: - spe_nor(f, frag[0], frag[0], pixel[0]); - spe_nor(f, frag[1], frag[1], pixel[1]); - spe_nor(f, frag[2], frag[2], pixel[2]); - spe_nor(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_AND_INVERTED: - spe_andc(f, frag[0], pixel[0], frag[0]); - spe_andc(f, frag[1], pixel[1], frag[1]); - spe_andc(f, frag[2], pixel[2], frag[2]); - spe_andc(f, frag[3], pixel[3], frag[3]); - break; - case PIPE_LOGICOP_COPY_INVERTED: - spe_nor(f, frag[0], frag[0], frag[0]); - spe_nor(f, frag[1], frag[1], frag[1]); - spe_nor(f, frag[2], frag[2], frag[2]); - spe_nor(f, frag[3], frag[3], frag[3]); - break; - case PIPE_LOGICOP_AND_REVERSE: - spe_andc(f, frag[0], frag[0], pixel[0]); - spe_andc(f, frag[1], frag[1], pixel[1]); - spe_andc(f, frag[2], frag[2], pixel[2]); - spe_andc(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_XOR: - spe_xor(f, frag[0], frag[0], pixel[0]); - spe_xor(f, frag[1], frag[1], pixel[1]); - spe_xor(f, frag[2], frag[2], pixel[2]); - spe_xor(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_NAND: - spe_nand(f, frag[0], frag[0], pixel[0]); - spe_nand(f, frag[1], frag[1], pixel[1]); - spe_nand(f, frag[2], frag[2], pixel[2]); - spe_nand(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_AND: - spe_and(f, frag[0], frag[0], pixel[0]); - spe_and(f, frag[1], frag[1], pixel[1]); - spe_and(f, frag[2], frag[2], pixel[2]); - spe_and(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_EQUIV: - spe_eqv(f, frag[0], frag[0], pixel[0]); - spe_eqv(f, frag[1], frag[1], pixel[1]); - spe_eqv(f, frag[2], frag[2], pixel[2]); - spe_eqv(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_OR_INVERTED: - spe_orc(f, frag[0], pixel[0], frag[0]); - spe_orc(f, frag[1], pixel[1], frag[1]); - spe_orc(f, frag[2], pixel[2], frag[2]); - spe_orc(f, frag[3], pixel[3], frag[3]); - break; - case PIPE_LOGICOP_COPY: - break; - case PIPE_LOGICOP_OR_REVERSE: - spe_orc(f, frag[0], frag[0], pixel[0]); - spe_orc(f, frag[1], frag[1], pixel[1]); - spe_orc(f, frag[2], frag[2], pixel[2]); - spe_orc(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_OR: - spe_or(f, frag[0], frag[0], pixel[0]); - spe_or(f, frag[1], frag[1], pixel[1]); - spe_or(f, frag[2], frag[2], pixel[2]); - spe_or(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_SET: - spe_il(f, frag[0], ~0); - spe_il(f, frag[1], ~0); - spe_il(f, frag[2], ~0); - spe_il(f, frag[3], ~0); - break; - - /* These two cases are short-circuited above. - */ - case PIPE_LOGICOP_INVERT: - case PIPE_LOGICOP_NOOP: - default: - assert(0); - } - - - /* Apply fragment mask. - */ - spe_ilh(f, tmp[0], 0x0000); - spe_ilh(f, tmp[1], 0x0404); - spe_ilh(f, tmp[2], 0x0808); - spe_ilh(f, tmp[3], 0x0c0c); - - spe_shufb(f, tmp[0], mask, mask, tmp[0]); - spe_shufb(f, tmp[1], mask, mask, tmp[1]); - spe_shufb(f, tmp[2], mask, mask, tmp[2]); - spe_shufb(f, tmp[3], mask, mask, tmp[3]); - - spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]); - spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]); - spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]); - spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]); - - spe_bi(f, 0, 0, 0); - -#if 0 - { - const uint32_t *p = f->store; - unsigned i; - - printf("# %u instructions\n", f->csr - f->store); - - printf("\t.text\n"); - for (i = 0; i < 64; i++) { - printf("\t.long\t0x%04x\n", p[i]); - } - fflush(stdout); - } -#endif -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h deleted file mode 100644 index a8267a51331..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef CELL_STATE_PER_FRAGMENT_H -#define CELL_STATE_PER_FRAGMENT_H - -extern void -cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); - -extern void -cell_generate_alpha_blend(struct cell_blend_state *cb); - -extern void -cell_generate_logic_op(struct spe_function *f, - const struct pipe_blend_state *blend, - struct pipe_surface *surf); - -#endif /* CELL_STATE_PER_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c deleted file mode 100644 index ddf14772689..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ /dev/null @@ -1,229 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "draw/draw_context.h" -#include "tgsi/tgsi_parse.h" - -#include "cell_context.h" -#include "cell_state.h" -#include "cell_gen_fp.h" -#include "cell_texture.h" - - -/** cast wrapper */ -static INLINE struct cell_fragment_shader_state * -cell_fragment_shader_state(void *shader) -{ - return (struct cell_fragment_shader_state *) shader; -} - - -/** cast wrapper */ -static INLINE struct cell_vertex_shader_state * -cell_vertex_shader_state(void *shader) -{ - return (struct cell_vertex_shader_state *) shader; -} - - -/** - * Create fragment shader state. - * Called via pipe->create_fs_state() - */ -static void * -cell_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_fragment_shader_state *cfs; - - cfs = CALLOC_STRUCT(cell_fragment_shader_state); - if (!cfs) - return NULL; - - cfs->shader.tokens = tgsi_dup_tokens(templ->tokens); - if (!cfs->shader.tokens) { - FREE(cfs); - return NULL; - } - - tgsi_scan_shader(templ->tokens, &cfs->info); - - cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code); - - return cfs; -} - - -/** - * Called via pipe->bind_fs_state() - */ -static void -cell_bind_fs_state(struct pipe_context *pipe, void *fs) -{ - struct cell_context *cell = cell_context(pipe); - - cell->fs = cell_fragment_shader_state(fs); - - cell->dirty |= CELL_NEW_FS; -} - - -/** - * Called via pipe->delete_fs_state() - */ -static void -cell_delete_fs_state(struct pipe_context *pipe, void *fs) -{ - struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); - - spe_release_func(&cfs->code); - - FREE((void *) cfs->shader.tokens); - FREE(cfs); -} - - -/** - * Create vertex shader state. - * Called via pipe->create_vs_state() - */ -static void * -cell_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_vertex_shader_state *cvs; - - cvs = CALLOC_STRUCT(cell_vertex_shader_state); - if (!cvs) - return NULL; - - cvs->shader.tokens = tgsi_dup_tokens(templ->tokens); - if (!cvs->shader.tokens) { - FREE(cvs); - return NULL; - } - - tgsi_scan_shader(templ->tokens, &cvs->info); - - cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader); - if (cvs->draw_data == NULL) { - FREE( (void *) cvs->shader.tokens ); - FREE( cvs ); - return NULL; - } - - return cvs; -} - - -/** - * Called via pipe->bind_vs_state() - */ -static void -cell_bind_vs_state(struct pipe_context *pipe, void *vs) -{ - struct cell_context *cell = cell_context(pipe); - - cell->vs = cell_vertex_shader_state(vs); - - draw_bind_vertex_shader(cell->draw, - (cell->vs ? cell->vs->draw_data : NULL)); - - cell->dirty |= CELL_NEW_VS; -} - - -/** - * Called via pipe->delete_vs_state() - */ -static void -cell_delete_vs_state(struct pipe_context *pipe, void *vs) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs); - - draw_delete_vertex_shader(cell->draw, cvs->draw_data); - FREE( (void *) cvs->shader.tokens ); - FREE( cvs ); -} - - -/** - * Called via pipe->set_constant_buffer() - */ -static void -cell_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - struct pipe_resource *constants) -{ - struct cell_context *cell = cell_context(pipe); - unsigned size = constants ? constants->width0 : 0; - const void *data = constants ? cell_resource(constants)->data : NULL; - - assert(shader < PIPE_SHADER_TYPES); - assert(index == 0); - - if (cell->constants[shader] == constants) - return; - - draw_flush(cell->draw); - - /* note: reference counting */ - pipe_resource_reference(&cell->constants[shader], constants); - - if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(cell->draw, PIPE_SHADER_VERTEX, 0, - data, size); - } - - cell->mapped_constants[shader] = data; - - if (shader == PIPE_SHADER_VERTEX) - cell->dirty |= CELL_NEW_VS_CONSTANTS; - else if (shader == PIPE_SHADER_FRAGMENT) - cell->dirty |= CELL_NEW_FS_CONSTANTS; -} - - -void -cell_init_shader_functions(struct cell_context *cell) -{ - cell->pipe.create_fs_state = cell_create_fs_state; - cell->pipe.bind_fs_state = cell_bind_fs_state; - cell->pipe.delete_fs_state = cell_delete_fs_state; - - cell->pipe.create_vs_state = cell_create_vs_state; - cell->pipe.bind_vs_state = cell_bind_vs_state; - cell->pipe.delete_vs_state = cell_delete_vs_state; - - cell->pipe.set_constant_buffer = cell_set_constant_buffer; -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c deleted file mode 100644 index 7f65b82619e..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ /dev/null @@ -1,120 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <[email protected]> - */ - - -#include "cell_context.h" -#include "cell_state.h" - -#include "util/u_memory.h" -#include "util/u_transfer.h" -#include "draw/draw_context.h" - - -static void * -cell_create_vertex_elements_state(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *attribs) -{ - struct cell_velems_state *velems; - assert(count <= PIPE_MAX_ATTRIBS); - velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state)); - if (velems) { - velems->count = count; - memcpy(velems->velem, attribs, sizeof(*attribs) * count); - } - return velems; -} - -static void -cell_bind_vertex_elements_state(struct pipe_context *pipe, - void *velems) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems; - - cell->velems = cell_velems; - - cell->dirty |= CELL_NEW_VERTEX; - - if (cell_velems) - draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem); -} - -static void -cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) -{ - FREE( velems ); -} - - -static void -cell_set_vertex_buffers(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct cell_context *cell = cell_context(pipe); - - assert(count <= PIPE_MAX_ATTRIBS); - - util_copy_vertex_buffers(cell->vertex_buffer, - &cell->num_vertex_buffers, - buffers, count); - - cell->dirty |= CELL_NEW_VERTEX; - - draw_set_vertex_buffers(cell->draw, count, buffers); -} - - -static void -cell_set_index_buffer(struct pipe_context *pipe, - const struct pipe_index_buffer *ib) -{ - struct cell_context *cell = cell_context(pipe); - - if (ib) - memcpy(&cell->index_buffer, ib, sizeof(cell->index_buffer)); - else - memset(&cell->index_buffer, 0, sizeof(cell->index_buffer)); - - draw_set_index_buffer(cell->draw, ib); -} - - -void -cell_init_vertex_functions(struct cell_context *cell) -{ - cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; - cell->pipe.set_index_buffer = cell_set_index_buffer; - cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; - cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; - cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; - cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer; -} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c deleted file mode 100644 index 777454479b1..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_surface.h" -#include "cell_context.h" -#include "cell_surface.h" - - -void -cell_init_surface_functions(struct cell_context *cell) -{ - cell->pipe.resource_copy_region = util_resource_copy_region; -} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h deleted file mode 100644 index 9e58f329443..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_surface.h +++ /dev/null @@ -1,42 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <[email protected]> - */ - -#ifndef CELL_SURFACE_H -#define CELL_SURFACE_H - - -struct cell_context; - - -extern void -cell_init_surface_functions(struct cell_context *cell); - - -#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c deleted file mode 100644 index 946a7050e5f..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ /dev/null @@ -1,644 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - * Michel Dänzer <[email protected]> - * Brian Paul - */ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_transfer.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_memory.h" - -#include "cell_context.h" -#include "cell_screen.h" -#include "cell_state.h" -#include "cell_texture.h" - -#include "state_tracker/sw_winsys.h" - - - -static boolean -cell_resource_layout(struct pipe_screen *screen, - struct cell_resource *ct) -{ - struct pipe_resource *pt = &ct->base; - unsigned level; - unsigned width = pt->width0; - unsigned height = pt->height0; - unsigned depth = pt->depth0; - - ct->buffer_size = 0; - - for (level = 0; level <= pt->last_level; level++) { - unsigned size; - unsigned w_tile, h_tile; - - assert(level < CELL_MAX_TEXTURE_LEVELS); - - /* width, height, rounded up to tile size */ - w_tile = align(width, TILE_SIZE); - h_tile = align(height, TILE_SIZE); - - ct->stride[level] = util_format_get_stride(pt->format, w_tile); - - ct->level_offset[level] = ct->buffer_size; - - size = ct->stride[level] * util_format_get_nblocksy(pt->format, h_tile); - if (pt->target == PIPE_TEXTURE_CUBE) - size *= 6; - else - size *= depth; - - ct->buffer_size += size; - - width = u_minify(width, 1); - height = u_minify(height, 1); - depth = u_minify(depth, 1); - } - - ct->data = align_malloc(ct->buffer_size, 16); - - return ct->data != NULL; -} - - -/** - * Texture layout for simple color buffers. - */ -static boolean -cell_displaytarget_layout(struct pipe_screen *screen, - struct cell_resource * ct) -{ - struct sw_winsys *winsys = cell_screen(screen)->winsys; - - /* Round up the surface size to a multiple of the tile size? - */ - ct->dt = winsys->displaytarget_create(winsys, - ct->base.bind, - ct->base.format, - ct->base.width0, - ct->base.height0, - 16, - &ct->dt_stride ); - - return ct->dt != NULL; -} - -static struct pipe_resource * -cell_resource_create(struct pipe_screen *screen, - const struct pipe_resource *templat) -{ - struct cell_resource *ct = CALLOC_STRUCT(cell_resource); - if (!ct) - return NULL; - - ct->base = *templat; - pipe_reference_init(&ct->base.reference, 1); - ct->base.screen = screen; - - /* Create both a displaytarget (linear) and regular texture - * (twiddled). Convert twiddled->linear at flush_frontbuffer time. - */ - if (ct->base.bind & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - if (!cell_displaytarget_layout(screen, ct)) - goto fail; - } - - if (!cell_resource_layout(screen, ct)) - goto fail; - - return &ct->base; - -fail: - if (ct->dt) { - struct sw_winsys *winsys = cell_screen(screen)->winsys; - winsys->displaytarget_destroy(winsys, ct->dt); - } - - FREE(ct); - - return NULL; -} - - -static void -cell_resource_destroy(struct pipe_screen *scrn, struct pipe_resource *pt) -{ - struct cell_screen *screen = cell_screen(scrn); - struct sw_winsys *winsys = screen->winsys; - struct cell_resource *ct = cell_resource(pt); - - if (ct->dt) { - /* display target */ - winsys->displaytarget_destroy(winsys, ct->dt); - } - else if (!ct->userBuffer) { - align_free(ct->data); - } - - FREE(ct); -} - - - -/** - * Convert image from linear layout to tiled layout. 4-byte pixels. - */ -static void -twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, - uint src_stride, const uint *src) -{ - const uint tile_size2 = tile_size * tile_size; - const uint h_t = (h + tile_size - 1) / tile_size; - const uint w_t = (w + tile_size - 1) / tile_size; - - uint it, jt; /* tile counters */ - uint i, j; /* intra-tile counters */ - - src_stride /= 4; /* convert from bytes to pixels */ - - /* loop over dest tiles */ - for (it = 0; it < h_t; it++) { - for (jt = 0; jt < w_t; jt++) { - /* start of dest tile: */ - uint *tdst = dst + (it * w_t + jt) * tile_size2; - - /* compute size of this tile (may be smaller than tile_size) */ - /* XXX note: a compiler bug was found here. That's why the code - * looks as it does. - */ - uint tile_width = w - jt * tile_size; - tile_width = MIN2(tile_width, tile_size); - uint tile_height = h - it * tile_size; - tile_height = MIN2(tile_height, tile_size); - - /* loop over texels in the tile */ - for (i = 0; i < tile_height; i++) { - for (j = 0; j < tile_width; j++) { - const uint srci = it * tile_size + i; - const uint srcj = jt * tile_size + j; - ASSERT(srci < h); - ASSERT(srcj < w); - tdst[i * tile_size + j] = src[srci * src_stride + srcj]; - } - } - } - } -} - - -/** - * For Cell. Basically, rearrange the pixels/quads from this layout: - * +--+--+--+--+ - * |p0|p1|p2|p3|.... - * +--+--+--+--+ - * - * to this layout: - * +--+--+ - * |p0|p1|.... - * +--+--+ - * |p2|p3| - * +--+--+ - */ -static void -twiddle_tile(const uint *tileIn, uint *tileOut) -{ - int y, x; - - for (y = 0; y < TILE_SIZE; y+=2) { - for (x = 0; x < TILE_SIZE; x+=2) { - int k = 4 * (y/2 * TILE_SIZE/2 + x/2); - tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k]; - tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1]; - tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2]; - tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3]; - } - } -} - - -/** - * Convert image from tiled layout to linear layout. 4-byte pixels. - */ -static void -untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, - uint dst_stride, const uint *src) -{ - const uint tile_size2 = tile_size * tile_size; - const uint h_t = (h + tile_size - 1) / tile_size; - const uint w_t = (w + tile_size - 1) / tile_size; - uint *tile_buf; - uint it, jt; /* tile counters */ - uint i, j; /* intra-tile counters */ - - dst_stride /= 4; /* convert from bytes to pixels */ - - tile_buf = align_malloc(tile_size * tile_size * 4, 16); - - /* loop over src tiles */ - for (it = 0; it < h_t; it++) { - for (jt = 0; jt < w_t; jt++) { - /* start of src tile: */ - const uint *tsrc = src + (it * w_t + jt) * tile_size2; - - twiddle_tile(tsrc, tile_buf); - tsrc = tile_buf; - - /* compute size of this tile (may be smaller than tile_size) */ - /* XXX note: a compiler bug was found here. That's why the code - * looks as it does. - */ - uint tile_width = w - jt * tile_size; - tile_width = MIN2(tile_width, tile_size); - uint tile_height = h - it * tile_size; - tile_height = MIN2(tile_height, tile_size); - - /* loop over texels in the tile */ - for (i = 0; i < tile_height; i++) { - for (j = 0; j < tile_width; j++) { - uint dsti = it * tile_size + i; - uint dstj = jt * tile_size + j; - ASSERT(dsti < h); - ASSERT(dstj < w); - dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j]; - } - } - } - } - - align_free(tile_buf); -} - - -static struct pipe_surface * -cell_create_surface(struct pipe_context *ctx, - struct pipe_resource *pt, - const struct pipe_surface *surf_tmpl) -{ - struct cell_resource *ct = cell_resource(pt); - struct pipe_surface *ps; - - assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); - ps = CALLOC_STRUCT(pipe_surface); - if (ps) { - pipe_reference_init(&ps->reference, 1); - pipe_resource_reference(&ps->texture, pt); - ps->format = surf_tmpl->format; - ps->context = ctx; - ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); - ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); - /* XXX may need to override usage flags (see sp_texture.c) */ - ps->usage = surf_tmpl->usage; - ps->u.tex.level = surf_tmpl->u.tex.level; - ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; - ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; - } - return ps; -} - - -static void -cell_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surf) -{ - pipe_resource_reference(&surf->texture, NULL); - FREE(surf); -} - - -/** - * Create new pipe_transfer object. - * This is used by the user to put tex data into a texture (and get it - * back out for glGetTexImage). - */ -static struct pipe_transfer * -cell_get_transfer(struct pipe_context *ctx, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box) -{ - struct cell_resource *ct = cell_resource(resource); - struct cell_transfer *ctrans; - enum pipe_format format = resource->format; - - assert(resource); - assert(level <= resource->last_level); - - /* make sure the requested region is in the image bounds */ - assert(box->x + box->width <= u_minify(resource->width0, level)); - assert(box->y + box->height <= u_minify(resource->height0, level)); - assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1)); - - ctrans = CALLOC_STRUCT(cell_transfer); - if (ctrans) { - struct pipe_transfer *pt = &ctrans->base; - pipe_resource_reference(&pt->resource, resource); - pt->level = level; - pt->usage = usage; - pt->box = *box; - pt->stride = ct->stride[level]; - - ctrans->offset = ct->level_offset[level]; - - if (resource->target == PIPE_TEXTURE_CUBE || resource->target == PIPE_TEXTURE_3D) { - unsigned h_tile = align(u_minify(resource->height0, level), TILE_SIZE); - ctrans->offset += box->z * util_format_get_nblocksy(format, h_tile) * pt->stride; - } - else { - assert(box->z == 0); - } - - return pt; - } - return NULL; -} - - -static void -cell_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *t) -{ - struct cell_transfer *transfer = cell_transfer(t); - /* Effectively do the texture_update work here - if texture images - * needed post-processing to put them into hardware layout, this is - * where it would happen. For cell, nothing to do. - */ - assert (transfer->base.resource); - pipe_resource_reference(&transfer->base.resource, NULL); - FREE(transfer); -} - - -/** - * Return pointer to texture image data in linear layout. - */ -static void * -cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer) -{ - struct cell_transfer *ctrans = cell_transfer(transfer); - struct pipe_resource *pt = transfer->resource; - struct cell_resource *ct = cell_resource(pt); - - assert(transfer->resource); - - if (ct->mapped == NULL) { - ct->mapped = ct->data; - } - - - /* Better test would be resource->is_linear - */ - if (transfer->resource->target != PIPE_BUFFER) { - const uint level = ctrans->base.level; - const uint texWidth = u_minify(pt->width0, level); - const uint texHeight = u_minify(pt->height0, level); - unsigned size; - - - /* - * Create a buffer of ordinary memory for the linear texture. - * This is the memory that the user will read/write. - */ - size = (util_format_get_stride(pt->format, align(texWidth, TILE_SIZE)) * - util_format_get_nblocksy(pt->format, align(texHeight, TILE_SIZE))); - - ctrans->map = align_malloc(size, 16); - if (!ctrans->map) - return NULL; /* out of memory */ - - if (transfer->usage & PIPE_TRANSFER_READ) { - /* Textures always stored twiddled, need to untwiddle the - * texture to make a linear version. - */ - const uint bpp = util_format_get_blocksize(ct->base.format); - if (bpp == 4) { - const uint *src = (uint *) (ct->mapped + ctrans->offset); - uint *dst = ctrans->map; - untwiddle_image_uint(texWidth, texHeight, TILE_SIZE, - dst, transfer->stride, src); - } - else { - // xxx fix - } - } - } - else { - unsigned stride = transfer->stride; - enum pipe_format format = pt->format; - unsigned blocksize = util_format_get_blocksize(format); - - ctrans->map = (ct->mapped + - ctrans->offset + - ctrans->base.box.y / util_format_get_blockheight(format) * stride + - ctrans->base.box.x / util_format_get_blockwidth(format) * blocksize); - } - - - return ctrans->map; -} - - -/** - * Called when user is done reading/writing texture data. - * If new data was written, this is where we convert the linear data - * to tiled data. - */ -static void -cell_transfer_unmap(struct pipe_context *ctx, - struct pipe_transfer *transfer) -{ - struct cell_transfer *ctrans = cell_transfer(transfer); - struct pipe_resource *pt = transfer->resource; - struct cell_resource *ct = cell_resource(pt); - const uint level = ctrans->base.level; - const uint texWidth = u_minify(pt->width0, level); - const uint texHeight = u_minify(pt->height0, level); - const uint stride = ct->stride[level]; - - if (!ct->mapped) { - assert(0); - return; - } - - if (pt->target != PIPE_BUFFER) { - if (transfer->usage & PIPE_TRANSFER_WRITE) { - /* The user wrote new texture data into the mapped buffer. - * We need to convert the new linear data into the twiddled/tiled format. - */ - const uint bpp = util_format_get_blocksize(ct->base.format); - if (bpp == 4) { - const uint *src = ctrans->map; - uint *dst = (uint *) (ct->mapped + ctrans->offset); - twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, stride, src); - } - else { - // xxx fix - } - } - - align_free(ctrans->map); - } - else { - /* nothing to do */ - } - - ctrans->map = NULL; -} - - - -/* This used to be overriden by the co-state tracker, but really needs - * to be active with sw_winsys. - * - * Contrasting with llvmpipe and softpipe, this is the only place - * where we use the ct->dt display target in any real sense. - * - * Basically just untwiddle our local data into the linear - * displaytarget. - */ -static void -cell_flush_frontbuffer(struct pipe_screen *_screen, - struct pipe_resource *resource, - unsigned level, unsigned layer, - void *context_private) -{ - struct cell_screen *screen = cell_screen(_screen); - struct sw_winsys *winsys = screen->winsys; - struct cell_resource *ct = cell_resource(resource); - - if (!ct->dt) - return; - - /* Need to untwiddle from our internal representation here: - */ - { - unsigned *map = winsys->displaytarget_map(winsys, ct->dt, - (PIPE_TRANSFER_READ | - PIPE_TRANSFER_WRITE)); - unsigned *src = (unsigned *)(ct->data + ct->level_offset[level]); - - untwiddle_image_uint(u_minify(resource->width0, level), - u_minify(resource->height0, level), - TILE_SIZE, - map, - ct->dt_stride, - src); - - winsys->displaytarget_unmap(winsys, ct->dt); - } - - winsys->displaytarget_display(winsys, ct->dt, context_private); -} - - - -/** - * Create buffer which wraps user-space data. - */ -static struct pipe_resource * -cell_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes, - unsigned bind_flags) -{ - struct cell_resource *buffer; - - buffer = CALLOC_STRUCT(cell_resource); - if(!buffer) - return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.screen = screen; - buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ - buffer->base.bind = PIPE_BIND_TRANSFER_READ | bind_flags; - buffer->base.usage = PIPE_USAGE_IMMUTABLE; - buffer->base.flags = 0; - buffer->base.width0 = bytes; - buffer->base.height0 = 1; - buffer->base.depth0 = 1; - buffer->base.array_size = 1; - buffer->userBuffer = TRUE; - buffer->data = ptr; - - return &buffer->base; -} - - -static struct pipe_resource * -cell_resource_from_handle(struct pipe_screen *screen, - const struct pipe_resource *templat, - struct winsys_handle *handle) -{ - /* XXX todo */ - return NULL; -} - - -static boolean -cell_resource_get_handle(struct pipe_screen *scree, - struct pipe_resource *tex, - struct winsys_handle *handle) -{ - /* XXX todo */ - return FALSE; -} - - -void -cell_init_screen_texture_funcs(struct pipe_screen *screen) -{ - screen->resource_create = cell_resource_create; - screen->resource_destroy = cell_resource_destroy; - screen->resource_from_handle = cell_resource_from_handle; - screen->resource_get_handle = cell_resource_get_handle; - screen->user_buffer_create = cell_user_buffer_create; - - screen->flush_frontbuffer = cell_flush_frontbuffer; -} - -void -cell_init_texture_transfer_funcs(struct cell_context *cell) -{ - cell->pipe.get_transfer = cell_get_transfer; - cell->pipe.transfer_destroy = cell_transfer_destroy; - cell->pipe.transfer_map = cell_transfer_map; - cell->pipe.transfer_unmap = cell_transfer_unmap; - - cell->pipe.transfer_flush_region = u_default_transfer_flush_region; - cell->pipe.transfer_inline_write = u_default_transfer_inline_write; - - cell->pipe.create_surface = cell_create_surface; - cell->pipe.surface_destroy = cell_surface_destroy; -} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h deleted file mode 100644 index bd8224b3b7b..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ /dev/null @@ -1,102 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_TEXTURE_H -#define CELL_TEXTURE_H - -#include "cell/common.h" - -struct cell_context; -struct pipe_resource; - - -/** - * Subclass of pipe_resource - */ -struct cell_resource -{ - struct pipe_resource base; - - unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS]; - unsigned long stride[CELL_MAX_TEXTURE_LEVELS]; - - /** - * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET - * usage. - */ - struct sw_displaytarget *dt; - unsigned dt_stride; - - /** - * Malloc'ed data for regular textures, or a mapping to dt above. - */ - void *data; - boolean userBuffer; - - /* Size of the linear buffer?? - */ - unsigned long buffer_size; - - /** The buffer above, mapped. This is the memory from which the - * SPUs will fetch texels. This texture data is in the tiled layout. - */ - ubyte *mapped; -}; - - -struct cell_transfer -{ - struct pipe_transfer base; - - unsigned long offset; - void *map; -}; - - -/** cast wrapper */ -static INLINE struct cell_resource * -cell_resource(struct pipe_resource *pt) -{ - return (struct cell_resource *) pt; -} - - -/** cast wrapper */ -static INLINE struct cell_transfer * -cell_transfer(struct pipe_transfer *pt) -{ - return (struct cell_transfer *) pt; -} - - -extern void -cell_init_screen_texture_funcs(struct pipe_screen *screen); - -extern void -cell_init_texture_transfer_funcs(struct cell_context *cell); - -#endif /* CELL_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c deleted file mode 100644 index 37b71956482..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ /dev/null @@ -1,332 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Vertex buffer code. The draw module transforms vertices to window - * coords, etc. and emits the vertices into buffer supplied by this module. - * When a vertex buffer is full, or we flush, we'll send the vertex data - * to the SPUs. - * - * Authors - * Brian Paul - */ - - -#include "cell_batch.h" -#include "cell_context.h" -#include "cell_fence.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_vbuf.h" -#include "draw/draw_vbuf.h" -#include "util/u_memory.h" - - -/** Allow vertex data to be inlined after RENDER command */ -#define ALLOW_INLINE_VERTS 1 - - -/** - * Subclass of vbuf_render because we need a cell_context pointer in - * a few places. - */ -struct cell_vbuf_render -{ - struct vbuf_render base; - struct cell_context *cell; - uint prim; /**< PIPE_PRIM_x */ - uint vertex_size; /**< in bytes */ - void *vertex_buffer; /**< just for debug, really */ - uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ - uint vertex_buffer_size; /**< size in bytes */ -}; - - -/** cast wrapper */ -static struct cell_vbuf_render * -cell_vbuf_render(struct vbuf_render *vbr) -{ - return (struct cell_vbuf_render *) vbr; -} - - - -static const struct vertex_info * -cell_vbuf_get_vertex_info(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - return &cvbr->cell->vertex_info; -} - - -static boolean -cell_vbuf_allocate_vertices(struct vbuf_render *vbr, - ushort vertex_size, ushort nr_vertices) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - unsigned size = vertex_size * nr_vertices; - /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ - - assert(cvbr->vertex_buf == ~0); - cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); - cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; - cvbr->vertex_buffer_size = size; - cvbr->vertex_size = vertex_size; - - return cvbr->vertex_buffer != NULL; -} - - -static void -cell_vbuf_release_vertices(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - struct cell_context *cell = cvbr->cell; - - /* - printf("%s vertex_buf = %u count = %u\n", - __FUNCTION__, cvbr->vertex_buf, vertices_used); - */ - - /* Make sure texture buffers aren't released until we're done rendering - * with them. - */ - cell_add_fenced_textures(cell); - - /* Tell SPUs they can release the vert buf */ - if (cvbr->vertex_buf != ~0U) { - STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0); - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) - cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts)); - release->opcode[0] = CELL_CMD_RELEASE_VERTS; - release->vertex_buf = cvbr->vertex_buf; - } - - cvbr->vertex_buf = ~0; - cell_flush_int(cell, 0x0); - - cvbr->vertex_buffer = NULL; -} - - -static void * -cell_vbuf_map_vertices(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - return cvbr->vertex_buffer; -} - - -static void -cell_vbuf_unmap_vertices(struct vbuf_render *vbr, - ushort min_index, - ushort max_index ) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); - /* do nothing */ -} - - - -static boolean -cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - cvbr->prim = prim; - /*printf("cell_set_prim %u\n", prim);*/ - return TRUE; -} - - -static void -cell_vbuf_draw_elements(struct vbuf_render *vbr, - const ushort *indices, - uint nr_indices) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - struct cell_context *cell = cvbr->cell; - float xmin, ymin, xmax, ymax; - uint i; - uint nr_vertices = 0, min_index = ~0; - const void *vertices = cvbr->vertex_buffer; - const uint vertex_size = cvbr->vertex_size; - - for (i = 0; i < nr_indices; i++) { - if (indices[i] > nr_vertices) - nr_vertices = indices[i]; - if (indices[i] < min_index) - min_index = indices[i]; - } - nr_vertices++; - -#if 0 - /*if (min_index > 0)*/ - printf("%s min_index = %u\n", __FUNCTION__, min_index); -#endif - -#if 0 - printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n", - nr_indices, nr_vertices); - printf(" "); - for (i = 0; i < nr_indices; i += 3) { - printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]); - } - printf("\n"); -#elif 0 - printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n", - nr_indices, nr_vertices, - indices[0], indices[1], indices[2]); - printf("ind space = %u, vert space = %u, space = %u\n", - nr_indices * 2, - nr_vertices * 4 * cell->vertex_info.size, - cell_batch_free_space(cell)); -#endif - - /* compute x/y bounding box */ - xmin = ymin = 1e50; - xmax = ymax = -1e50; - for (i = min_index; i < nr_vertices; i++) { - const float *v = (float *) ((ubyte *) vertices + i * vertex_size); - if (v[0] < xmin) - xmin = v[0]; - if (v[0] > xmax) - xmax = v[0]; - if (v[1] < ymin) - ymin = v[1]; - if (v[1] > ymax) - ymax = v[1]; - } -#if 0 - printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax); - fflush(stdout); -#endif - - if (cvbr->prim != PIPE_PRIM_TRIANGLES) - return; /* only render tris for now */ - - /* build/insert batch RENDER command */ - { - const uint index_bytes = ROUNDUP16(nr_indices * 2); - const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size); - STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0); - const uint batch_size = sizeof(struct cell_command_render) + index_bytes; - - struct cell_command_render *render - = (struct cell_command_render *) - cell_batch_alloc16(cell, batch_size); - - render->opcode[0] = CELL_CMD_RENDER; - render->prim_type = cvbr->prim; - - render->num_indexes = nr_indices; - render->min_index = min_index; - - /* append indices after render command */ - memcpy(render + 1, indices, nr_indices * 2); - - /* if there's room, append vertices after the indices, else leave - * vertices in the original/separate buffer. - */ - render->vertex_size = 4 * cell->vertex_info.size; - render->num_verts = nr_vertices; - if (ALLOW_INLINE_VERTS && - min_index == 0 && - vertex_bytes + 16 <= cell_batch_free_space(cell)) { - /* vertex data inlined, after indices, at 16-byte boundary */ - void *dst = cell_batch_alloc16(cell, vertex_bytes); - memcpy(dst, vertices, vertex_bytes); - render->inline_verts = TRUE; - render->vertex_buf = ~0; - } - else { - /* vertex data in separate buffer */ - render->inline_verts = FALSE; - ASSERT(cvbr->vertex_buf >= 0); - render->vertex_buf = cvbr->vertex_buf; - } - - render->xmin = xmin; - render->ymin = ymin; - render->xmax = xmax; - render->ymax = ymax; - } - -#if 0 - /* helpful for debug */ - cell_flush_int(cell, CELL_FLUSH_WAIT); -#endif -} - - -static void -cell_vbuf_destroy(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - cvbr->cell->vbuf_render = NULL; - FREE(cvbr); -} - - -/** - * Initialize the post-transform vertex buffer information for the given - * context. - */ -void -cell_init_vbuf(struct cell_context *cell) -{ - assert(cell->draw); - - cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); - - /* The max number of indexes is what can fix into a batch buffer, - * minus the render and release-verts commands. - */ - cell->vbuf_render->base.max_indices - = (CELL_BUFFER_SIZE - - sizeof(struct cell_command_render) - - sizeof(struct cell_command_release_verts)) - / sizeof(ushort); - cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; - - cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; - cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; - cell->vbuf_render->base.map_vertices = cell_vbuf_map_vertices; - cell->vbuf_render->base.unmap_vertices = cell_vbuf_unmap_vertices; - cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive; - cell->vbuf_render->base.draw_elements = cell_vbuf_draw_elements; - cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices; - cell->vbuf_render->base.destroy = cell_vbuf_destroy; - - cell->vbuf_render->cell = cell; -#if 1 - cell->vbuf_render->vertex_buf = ~0; -#endif - - cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); -} diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h deleted file mode 100644 index d265cbf7701..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_VBUF_H -#define CELL_VBUF_H - - -struct cell_context; - -extern void -cell_init_vbuf(struct cell_context *cell); - - -#endif /* CELL_VBUF_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c deleted file mode 100644 index 9cba537d9eb..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ /dev/null @@ -1,346 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <inttypes.h> -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "pipe/p_format.h" - -#include "../auxiliary/draw/draw_context.h" -#include "../auxiliary/draw/draw_private.h" - -#include "cell_context.h" -#include "rtasm/rtasm_ppc_spe.h" - - -/** - * Emit a 4x4 matrix transpose operation - * - * \param p Function that the transpose operation is to be appended to - * \param row0 Register containing row 0 of the source matrix - * \param row1 Register containing row 1 of the source matrix - * \param row2 Register containing row 2 of the source matrix - * \param row3 Register containing row 3 of the source matrix - * \param dest_ptr Register containing the address of the destination matrix - * \param shuf_ptr Register containing the address of the shuffled data - * \param count Number of colums to actually be written to the destination - * - * \note - * This function assumes that the registers named by \c row0, \c row1, - * \c row2, and \c row3 are scratch and can be modified by the generated code. - * Furthermore, these registers will be released, via calls to - * \c release_register, by this function. - * - * \note - * This function requires that four temporary are available on entry. - */ -static void -emit_matrix_transpose(struct spe_function *p, - unsigned row0, unsigned row1, unsigned row2, - unsigned row3, unsigned dest_ptr, - unsigned shuf_ptr, unsigned count) -{ - int shuf_hi = spe_allocate_available_register(p); - int shuf_lo = spe_allocate_available_register(p); - int t1 = spe_allocate_available_register(p); - int t2 = spe_allocate_available_register(p); - int t3; - int t4; - int col0; - int col1; - int col2; - int col3; - - - spe_lqd(p, shuf_hi, shuf_ptr, 3*16); - spe_lqd(p, shuf_lo, shuf_ptr, 4*16); - spe_shufb(p, t1, row0, row2, shuf_hi); - spe_shufb(p, t2, row0, row2, shuf_lo); - - - /* row0 and row2 are now no longer needed. Re-use those registers as - * temporaries. - */ - t3 = row0; - t4 = row2; - - spe_shufb(p, t3, row1, row3, shuf_hi); - spe_shufb(p, t4, row1, row3, shuf_lo); - - - /* row1 and row3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col0 = row1; - col1 = row3; - - spe_shufb(p, col0, t1, t3, shuf_hi); - if (count > 1) { - spe_shufb(p, col1, t1, t3, shuf_lo); - } - - /* t1 and t3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col2 = t1; - col3 = t3; - - if (count > 2) { - spe_shufb(p, col2, t2, t4, shuf_hi); - } - - if (count > 3) { - spe_shufb(p, col3, t2, t4, shuf_lo); - } - - - /* Store the results. Remember that the stqd instruction is encoded using - * the qword offset (stand-alone assemblers to the byte-offset to - * qword-offset conversion for you), so the byte-offset needs be divided by - * 16. - */ - switch (count) { - case 4: - spe_stqd(p, col3, dest_ptr, 3 * 16); - case 3: - spe_stqd(p, col2, dest_ptr, 2 * 16); - case 2: - spe_stqd(p, col1, dest_ptr, 1 * 16); - case 1: - spe_stqd(p, col0, dest_ptr, 0 * 16); - } - - - /* Release all of the temporary registers used. - */ - spe_release_register(p, col0); - spe_release_register(p, col1); - spe_release_register(p, col2); - spe_release_register(p, col3); - spe_release_register(p, shuf_hi); - spe_release_register(p, shuf_lo); - spe_release_register(p, t2); - spe_release_register(p, t4); -} - - -#if 0 -/* This appears to not be used currently */ -static void -emit_fetch(struct spe_function *p, - unsigned in_ptr, unsigned *offset, - unsigned out_ptr, unsigned shuf_ptr, - enum pipe_format format) -{ - const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) - + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); - const unsigned type = pf_type(format); - const unsigned bytes = pf_size_x(format); - - int v0 = spe_allocate_available_register(p); - int v1 = spe_allocate_available_register(p); - int v2 = spe_allocate_available_register(p); - int v3 = spe_allocate_available_register(p); - int tmp = spe_allocate_available_register(p); - int float_zero = -1; - int float_one = -1; - float scale_signed = 0.0; - float scale_unsigned = 0.0; - - spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16); - spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16); - spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16); - spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16); - offset[0] += 4; - - switch (bytes) { - case 1: - scale_signed = 1.0f / 127.0f; - scale_unsigned = 1.0f / 255.0f; - spe_lqd(p, tmp, shuf_ptr, 1 * 16); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 2: - scale_signed = 1.0f / 32767.0f; - scale_unsigned = 1.0f / 65535.0f; - spe_lqd(p, tmp, shuf_ptr, 2 * 16); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 4: - scale_signed = 1.0f / 2147483647.0f; - scale_unsigned = 1.0f / 4294967295.0f; - break; - default: - assert(0); - break; - } - - switch (type) { - case PIPE_FORMAT_TYPE_FLOAT: - break; - case PIPE_FORMAT_TYPE_UNORM: - spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); - spe_cuflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_SNORM: - spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); - spe_csflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_USCALED: - spe_cuflt(p, v0, v0, 0); - break; - case PIPE_FORMAT_TYPE_SSCALED: - spe_csflt(p, v0, v0, 0); - break; - } - - - if (count < 4) { - float_one = spe_allocate_available_register(p); - spe_il(p, float_one, 1); - spe_cuflt(p, float_one, float_one, 0); - - if (count < 3) { - float_zero = spe_allocate_available_register(p); - spe_il(p, float_zero, 0); - } - } - - spe_release_register(p, tmp); - - emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); - - switch (count) { - case 1: - spe_stqd(p, float_zero, out_ptr, 1 * 16); - case 2: - spe_stqd(p, float_zero, out_ptr, 2 * 16); - case 3: - spe_stqd(p, float_one, out_ptr, 3 * 16); - } - - if (float_zero != -1) { - spe_release_register(p, float_zero); - } - - if (float_one != -1) { - spe_release_register(p, float_one); - } -} -#endif - - -void cell_update_vertex_fetch(struct draw_context *draw) -{ -#if 0 - struct cell_context *const cell = - (struct cell_context *) draw->driver_private; - struct spe_function *p = &cell->attrib_fetch; - unsigned function_index[PIPE_MAX_ATTRIBS]; - unsigned unique_attr_formats; - int out_ptr; - int in_ptr; - int shuf_ptr; - unsigned i; - unsigned j; - - - /* Determine how many unique input attribute formats there are. At the - * same time, store the index of the lowest numbered attribute that has - * the same format as any non-unique format. - */ - unique_attr_formats = 1; - function_index[0] = 0; - for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { - const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; - - for (j = 0; j < i; j++) { - if (curr_fmt == draw->vertex_element[j].src_format) { - break; - } - } - - if (j == i) { - unique_attr_formats++; - } - - function_index[i] = j; - } - - - /* Each fetch function can be a maximum of 34 instructions (note: this is - * actually a slight over-estimate). - */ - spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats); - - - /* Allocate registers for the function's input parameters. - */ - out_ptr = spe_allocate_register(p, 3); - in_ptr = spe_allocate_register(p, 4); - shuf_ptr = spe_allocate_register(p, 5); - - - /* Generate code for the individual attribute fetch functions. - */ - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - unsigned offset; - - if (function_index[i] == i) { - cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr - - (void *) p->store); - - offset = 0; - emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, - draw->vertex_element[i].src_format); - spe_bi(p, 0, 0, 0); - - /* Round up to the next 16-byte boundary. - */ - if ((((unsigned) p->store) & 0x0f) != 0) { - const unsigned align = ((unsigned) p->store) & 0x0f; - p->store = (uint32_t *) (((void *) p->store) + align); - } - } else { - /* Use the same function entry-point as a previously seen attribute - * with the same format. - */ - cell->attrib_fetch_offsets[i] = - cell->attrib_fetch_offsets[function_index[i]]; - } - } -#else - assert(0); -#endif -} diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c deleted file mode 100644 index 3d389d6ea36..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file cell_vertex_shader.c - * Vertex shader interface routines for Cell. - * - * \author Ian Romanick <[email protected]> - */ - -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "util/u_math.h" - -#include "cell_context.h" -#include "cell_draw_arrays.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_batch.h" - -#include "cell/common.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" - -/** - * Run the vertex shader on all vertices in the vertex queue. - * Called by the draw module when the vertx cache needs to be flushed. - */ -void -cell_vertex_shader_queue_flush(struct draw_context *draw) -{ -#if 0 - struct cell_context *const cell = - (struct cell_context *) draw->driver_private; - struct cell_command_vs *const vs = &cell_global.command[0].vs; - uint64_t *batch; - struct cell_array_info *array_info; - unsigned i, j; - struct cell_attribute_fetch_code *cf; - - assert(draw->vs.queue_nr != 0); - - /* XXX: do this on statechange: - */ - draw_update_vertex_fetch(draw); - cell_update_vertex_fetch(draw); - - - batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf)); - batch[0] = CELL_CMD_STATE_ATTRIB_FETCH; - cf = (struct cell_attribute_fetch_code *) (&batch[1]); - cf->base = (uint64_t) cell->attrib_fetch.store; - cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr - - (void *) cell->attrib_fetch.store)); - - - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - const enum pipe_format format = draw->vertex_element[i].src_format; - const unsigned count = ((pf_size_x(format) != 0) - + (pf_size_y(format) != 0) - + (pf_size_z(format) != 0) - + (pf_size_w(format) != 0)); - const unsigned size = pf_size_x(format) * count; - - batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); - - batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; - - array_info = (struct cell_array_info *) &batch[1]; - assert(draw->vertex_fetch.src_ptr[i] != NULL); - array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; - array_info->attr = i; - array_info->pitch = draw->vertex_fetch.pitch[i]; - array_info->size = size; - array_info->function_offset = cell->attrib_fetch_offsets[i]; - } - - batch = cell_batch_alloc(cell, sizeof(batch[0]) - + sizeof(struct pipe_viewport_state)); - batch[0] = CELL_CMD_STATE_VIEWPORT; - (void) memcpy(&batch[1], &draw->viewport, - sizeof(struct pipe_viewport_state)); - - { - uint64_t uniforms = (uintptr_t) draw->user.constants; - - batch = cell_batch_alloc(cell, 2 *sizeof(batch[0])); - batch[0] = CELL_CMD_STATE_UNIFORMS; - batch[1] = uniforms; - } - - cell_batch_flush(cell); - - vs->opcode = CELL_CMD_VS_EXECUTE; - vs->nr_attrs = draw->vertex_fetch.nr_attrs; - - (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); - vs->nr_planes = draw->nr_planes; - - for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) { - const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i); - - for (j = 0; j < n; j++) { - vs->elts[j] = draw->vs.queue[i + j].elt; - vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; - } - - for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { - vs->elts[j] = vs->elts[0]; - vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; - } - - vs->num_elts = n; - send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); - - cell_flush_int(cell, CELL_FLUSH_WAIT); - } - - draw->vs.post_nr = draw->vs.queue_nr; - draw->vs.queue_nr = 0; -#else - assert(0); -#endif -} |