diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/cell/common.h | 31 | ||||
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_state_emit.c | 77 | ||||
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_command.c | 111 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_command.h | 32 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_main.c | 15 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_main.h | 4 |
8 files changed, 216 insertions, 64 deletions
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index a670ed3c6ec..98554d7f521 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -121,11 +121,6 @@ #define CELL_DEBUG_CMD (1 << 5) #define CELL_DEBUG_CACHE (1 << 6) -/** Max instructions for doing per-fragment operations */ -#define SPU_MAX_FRAGMENT_OPS_INSTS 128 - - - #define CELL_FENCE_IDLE 0 #define CELL_FENCE_EMITTED 1 #define CELL_FENCE_SIGNALLED 2 @@ -153,18 +148,36 @@ struct cell_command_fence /** * Command to specify per-fragment operations state and generated code. - * Note that the dsa, blend, blend_color fields are really only needed + * Note that this is a variant-length structure, allocated with as + * much memory as needed to hold the generated code; the "code" + * field *must* be the last field in the structure. Also, the entire + * length of the structure (including the variant code field) must be + * a multiple of 8 bytes; we require that this structure itself be + * a multiple of 8 bytes, and that the generated code also be a multiple + * of 8 bytes. + * + * Also note that the dsa, blend, blend_color fields are really only needed * for the fallback/C per-pixel code. They're not used when we generate - * dynamic SPU fragment code (which is the normal case). + * dynamic SPU fragment code (which is the normal case), and will eventually + * be removed from this structure. */ struct cell_command_fragment_ops { uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ + + /* Fields for the fallback case */ struct pipe_depth_stencil_alpha_state dsa; struct pipe_blend_state blend; struct pipe_blend_color blend_color; - unsigned code_front[SPU_MAX_FRAGMENT_OPS_INSTS]; - unsigned code_back[SPU_MAX_FRAGMENT_OPS_INSTS]; + + /* Fields for the generated SPU code */ + unsigned total_code_size; + unsigned front_code_index; + unsigned back_code_index; + /* this field has variant length, and must be the last field in + * the structure + */ + unsigned code[0]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 82336d6635a..2c64eb1bccd 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -1776,7 +1776,10 @@ gen_stencil_depth_test(struct spe_function *f, * \param cell the rendering context (in) * \param facing whether the generated code is for front-facing or * back-facing fragments - * \param f the generated function (out) + * \param f the generated function (in/out); on input, the function + * must already have been initialized. On exit, whatever + * instructions within the generated function have had + * the fragment ops appended. */ void cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f) @@ -1808,8 +1811,6 @@ cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ - spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); - if (cell->debug_flags & CELL_DEBUG_ASM) { spe_print_code(f, true); spe_indent(f, 8); diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 031b27f11f6..0a0af81f53f 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -76,30 +76,86 @@ lookup_fragment_ops(struct cell_context *cell) */ if (!ops) { struct spe_function spe_code_front, spe_code_back; + unsigned int facing_dependent, total_code_size; if (0) debug_printf("**** Create New Fragment Ops\n"); - /* Prepare the buffer that will hold the generated code. */ - spe_init_func(&spe_code_front, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); - spe_init_func(&spe_code_back, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + /* Prepare the buffer that will hold the generated code. The + * "0" passed in for the size means that the SPE code will + * use a default size. + */ + spe_init_func(&spe_code_front, 0); + spe_init_func(&spe_code_back, 0); - /* generate new code. Always generate new code for both front-facing + /* Generate new code. Always generate new code for both front-facing * and back-facing fragments, even if it's the same code in both * cases. */ cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front); cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back); - /* alloc new fragment ops command */ - ops = CALLOC_STRUCT(cell_command_fragment_ops); + /* Make sure the code is a multiple of 8 bytes long; this is + * required to ensure that the dual pipe instruction alignment + * is correct. It's also important for the SPU unpacking, + * which assumes 8-byte boundaries. + */ + unsigned int front_code_size = spe_code_size(&spe_code_front); + while (front_code_size % 8 != 0) { + spe_lnop(&spe_code_front); + front_code_size = spe_code_size(&spe_code_front); + } + unsigned int back_code_size = spe_code_size(&spe_code_back); + while (back_code_size % 8 != 0) { + spe_lnop(&spe_code_back); + back_code_size = spe_code_size(&spe_code_back); + } + /* Determine whether the code we generated is facing-dependent, by + * determining whether the generated code is different for the front- + * and back-facing fragments. + */ + if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) { + /* Code is identical; only need one copy. */ + facing_dependent = 0; + total_code_size = front_code_size; + } + else { + /* Code is different for front-facing and back-facing fragments. + * Need to send both copies. + */ + facing_dependent = 1; + total_code_size = front_code_size + back_code_size; + } + + /* alloc new fragment ops command. Note that this structure + * has variant length based on the total code size required. + */ + ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size); /* populate the new cell_command_fragment_ops object */ ops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; - memcpy(ops->code_front, spe_code_front.store, spe_code_size(&spe_code_front)); - memcpy(ops->code_back, spe_code_back.store, spe_code_size(&spe_code_back)); + ops->total_code_size = total_code_size; + ops->front_code_index = 0; + memcpy(ops->code, spe_code_front.store, front_code_size); + if (facing_dependent) { + /* We have separate front- and back-facing code. Append the + * back-facing code to the buffer. Be careful because the code + * size is in bytes, but the buffer is of unsigned elements. + */ + ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]); + memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size); + } + else { + /* Use the same code for front- and back-facing fragments */ + ops->back_code_index = ops->front_code_index; + } + + /* Set the fields for the fallback case. Note that these fields + * (and the whole fallback case) will eventually go away. + */ ops->dsa = *cell->depth_stencil; ops->blend = *cell->blend; + ops->blend_color = cell->blend_color; /* insert cell_command_fragment_ops object into keymap/cache */ util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); @@ -200,9 +256,10 @@ cell_emit_state(struct cell_context *cell) CELL_NEW_DEPTH_STENCIL | CELL_NEW_BLEND)) { struct cell_command_fragment_ops *fops, *fops_cmd; - fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd)); + /* Note that cell_command_fragment_ops is a variant-sized record */ fops = lookup_fragment_ops(cell); - memcpy(fops_cmd, fops, sizeof(*fops)); + fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd) + fops->total_code_size); + memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size); } if (cell->dirty & CELL_NEW_SAMPLER) { diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 18969005b02..9cba537d9eb 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -145,6 +145,8 @@ emit_matrix_transpose(struct spe_function *p, } +#if 0 +/* This appears to not be used currently */ static void emit_fetch(struct spe_function *p, unsigned in_ptr, unsigned *offset, @@ -256,6 +258,7 @@ emit_fetch(struct spe_function *p, spe_release_register(p, float_one); } } +#endif void cell_update_vertex_fetch(struct draw_context *draw) diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index d5faf4e3aaa..8500d19754e 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -210,45 +210,72 @@ cmd_release_verts(const struct cell_command_release_verts *release) static void cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) { - static int warned = 0; - D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); - /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_ops_code_front, fops->code_front, SPU_MAX_FRAGMENT_OPS_INSTS * 4); - memcpy(spu.fragment_ops_code_back, fops->code_back, SPU_MAX_FRAGMENT_OPS_INSTS * 4); - /* Copy state info (for fallback case only) */ + + /* Copy state info (for fallback case only - this will eventually + * go away when the fallback case goes away) + */ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color)); - /* Parity twist! For now, always use the fallback code by default, - * only switching to codegen when specifically requested. This - * allows us to develop freely without risking taking down the - * branch. - * - * Later, the parity of this check will be reversed, so that - * codegen is *always* used, unless we specifically indicate that - * we don't want it. - * - * Eventually, the option will be removed completely, because in - * final code we'll always use codegen and won't even provide the - * raw state records that the fallback code requires. + /* Make sure the SPU knows which buffers it's expected to read when + * it's told to pull tiles. */ - if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) { - spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) spu.fragment_ops_code_front; - spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) spu.fragment_ops_code_back; - } - else { - /* otherwise, the default fallback code remains in place */ + spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled); + + /* If we're forcing the fallback code to be used (for debug purposes), + * install that. Otherwise install the incoming SPU code. + */ + if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) { + static unsigned int warned = 0; if (!warned) { fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); warned = 1; } + /* The following two lines aren't really necessary if you + * know the debug flags won't change during a run, and if you + * know that the function pointers are initialized correctly. + * We set them here to allow a person to change the debug + * flags during a run (from inside a debugger). + */ + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + return; } - spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled); -} + /* Make sure the SPU code buffer is large enough to hold the incoming code. + * Note that we *don't* use align_malloc() and align_free(), because + * those utility functions are *not* available in SPU code. + * */ + if (spu.fragment_ops_code_size < fops->total_code_size) { + if (spu.fragment_ops_code != NULL) { + free(spu.fragment_ops_code); + } + spu.fragment_ops_code_size = fops->total_code_size; + spu.fragment_ops_code = malloc(fops->total_code_size); + if (spu.fragment_ops_code == NULL) { + /* Whoops. */ + fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size); + spu.fragment_ops_code = NULL; + spu.fragment_ops_code_size = 0; + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + return; + } + } + /* Copy the SPU code from the command buffer to the spu buffer */ + memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size); + + /* Set the pointers for the front-facing and back-facing fragments + * to the specified offsets within the code. Note that if the + * front-facing and back-facing code are the same, they'll have + * the same offset. + */ + spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index]; + spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index]; +} static void cmd_state_fragment_program(const struct cell_command_fragment_program *fp) @@ -588,7 +615,8 @@ cmd_batch(uint opcode) struct cell_command_fragment_ops *fops = (struct cell_command_fragment_ops *) &buffer[pos]; cmd_state_fragment_ops(fops); - pos += sizeof(*fops) / 8; + /* This is a variant-sized command */ + pos += (sizeof(*fops) + fops->total_code_size)/ 8; } break; case CELL_CMD_STATE_FRAGMENT_PROGRAM: @@ -756,3 +784,32 @@ command_loop(void) if (spu.init.debug_flags & CELL_DEBUG_CACHE) spu_dcache_report(); } + +/* Initialize this module; we manage the fragment ops buffer here. */ +void +spu_command_init(void) +{ + /* Install default/fallback fragment processing function. + * This will normally be overriden by a code-gen'd function + * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. + */ + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + + /* Set up the basic empty buffer for code-gen'ed fragment ops */ + spu.fragment_ops_code = NULL; + spu.fragment_ops_code_size = 0; +} + +void +spu_command_close(void) +{ + /* Deallocate the code-gen buffer for fragment ops, and reset the + * fragment ops functions to their initial setting (just to leave + * things in a good state). + */ + if (spu.fragment_ops_code != NULL) { + free(spu.fragment_ops_code); + } + spu_command_init(); +} diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h index 853e9aa5498..83dcdade288 100644 --- a/src/gallium/drivers/cell/spu/spu_command.h +++ b/src/gallium/drivers/cell/spu/spu_command.h @@ -1,7 +1,35 @@ - - +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ extern void command_loop(void); +extern void +spu_command_init(void); +extern void +spu_command_close(void); diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 7033f6037df..97c86d194da 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -58,17 +58,8 @@ one_time_init(void) memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); invalidate_tex_cache(); - - /* Install default/fallback fragment processing function. - * This will normally be overriden by a code-gen'd function - * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. - */ - spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; - spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; } - - /* In some versions of the SDK the SPE main takes 'unsigned long' as a * parameter. In others it takes 'unsigned long long'. Use a define to * select between the two. @@ -91,11 +82,11 @@ main(main_param_t speid, main_param_t argp) ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); ASSERT(sizeof(struct cell_command_render) % 8 == 0); - ASSERT(((unsigned long) &spu.fragment_ops_code_front) % 8 == 0); - ASSERT(((unsigned long) &spu.fragment_ops_code_back) % 8 == 0); + ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0); ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0); one_time_init(); + spu_command_init(); D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); @@ -120,5 +111,7 @@ main(main_param_t speid, main_param_t argp) command_loop(); + spu_command_close(); + return 0; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 24cf7d77ce1..33767e7c51d 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -169,8 +169,8 @@ struct spu_global ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; /** Current fragment ops machine code, at 8-byte boundary */ - uint fragment_ops_code_front[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB; - uint fragment_ops_code_back[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB; + uint *fragment_ops_code; + uint fragment_ops_code_size; /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */ spu_fragment_ops_func fragment_ops[2]; |