summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/cell/spu
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2008-03-19 17:29:39 -0700
committerIan Romanick <[email protected]>2008-03-20 17:37:26 -0700
commit2b21bde3b1fa6fe357a3a5adc6249e89d6915524 (patch)
tree8a6edb1d40770837580e9d3de553c001947516d3 /src/gallium/drivers/cell/spu
parent3b3774b1227743147159676795b542c0eb7c2bdf (diff)
cell: Use code-gen for alpha blend
So far this is only tested when GL_BLEND is disabled.
Diffstat (limited to 'src/gallium/drivers/cell/spu')
-rw-r--r--src/gallium/drivers/cell/spu/Makefile1
-rw-r--r--src/gallium/drivers/cell/spu/spu_blend.c62
-rw-r--r--src/gallium/drivers/cell/spu/spu_blend.h37
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c50
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h17
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c56
6 files changed, 97 insertions, 126 deletions
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
index 115ca8cd901..8e83610790e 100644
--- a/src/gallium/drivers/cell/spu/Makefile
+++ b/src/gallium/drivers/cell/spu/Makefile
@@ -17,7 +17,6 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o
SOURCES = \
spu_main.c \
- spu_blend.c \
spu_dcache.c \
spu_per_fragment_op.c \
spu_render.c \
diff --git a/src/gallium/drivers/cell/spu/spu_blend.c b/src/gallium/drivers/cell/spu/spu_blend.c
deleted file mode 100644
index 23ec0eeb451..00000000000
--- a/src/gallium/drivers/cell/spu/spu_blend.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "spu_main.h"
-#include "spu_blend.h"
-#include "spu_colorpack.h"
-
-
-void
-blend_quad(uint itx, uint ity, vector float colors[4])
-{
- /* simple SRC_ALPHA, ONE_MINUS_SRC_ALPHA blending */
- vector float fbc00 = spu_unpack_color(spu.ctile.ui[ity][itx]);
- vector float fbc01 = spu_unpack_color(spu.ctile.ui[ity][itx+1]);
- vector float fbc10 = spu_unpack_color(spu.ctile.ui[ity+1][itx]);
- vector float fbc11 = spu_unpack_color(spu.ctile.ui[ity+1][itx+1]);
-
- vector float alpha00 = spu_splats(spu_extract(colors[0], 3));
- vector float alpha01 = spu_splats(spu_extract(colors[1], 3));
- vector float alpha10 = spu_splats(spu_extract(colors[2], 3));
- vector float alpha11 = spu_splats(spu_extract(colors[3], 3));
-
- vector float one_minus_alpha00 = spu_sub(spu_splats(1.0f), alpha00);
- vector float one_minus_alpha01 = spu_sub(spu_splats(1.0f), alpha01);
- vector float one_minus_alpha10 = spu_sub(spu_splats(1.0f), alpha10);
- vector float one_minus_alpha11 = spu_sub(spu_splats(1.0f), alpha11);
-
- colors[0] = spu_add(spu_mul(colors[0], alpha00),
- spu_mul(fbc00, one_minus_alpha00));
- colors[1] = spu_add(spu_mul(colors[1], alpha01),
- spu_mul(fbc01, one_minus_alpha01));
- colors[2] = spu_add(spu_mul(colors[2], alpha10),
- spu_mul(fbc10, one_minus_alpha10));
- colors[3] = spu_add(spu_mul(colors[3], alpha11),
- spu_mul(fbc11, one_minus_alpha11));
-}
-
diff --git a/src/gallium/drivers/cell/spu/spu_blend.h b/src/gallium/drivers/cell/spu/spu_blend.h
deleted file mode 100644
index 2b594b578b4..00000000000
--- a/src/gallium/drivers/cell/spu/spu_blend.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef SPU_BLEND_H
-#define SPU_BLEND_H
-
-
-extern void
-blend_quad(uint itx, uint ity, vector float colors[4]);
-
-
-#endif /* SPU_BLEND_H */
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 937962285d0..41bebf5362b 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -61,6 +61,25 @@ static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX]
static unsigned char depth_stencil_code_buffer[4 * 64]
ALIGN16_ATTRIB;
+static unsigned char fb_blend_code_buffer[4 * 64]
+ ALIGN16_ATTRIB;
+
+static struct spu_blend_results
+default_blend(qword frag_r, qword frag_g, qword frag_b, qword frag_a,
+ qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
+ qword frag_mask)
+{
+ struct spu_blend_results result;
+
+ result.r = si_selb(pixel_r, frag_r, frag_mask);
+ result.g = si_selb(pixel_g, frag_g, frag_mask);
+ result.b = si_selb(pixel_b, frag_b, frag_mask);
+ result.a = si_selb(pixel_a, frag_a, frag_mask);
+
+ return result;
+}
+
+
/**
* Tell the PPU that this SPU has finished copying a buffer to
* local store and that it may be reused by the PPU.
@@ -246,14 +265,31 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
static void
-cmd_state_blend(const struct pipe_blend_state *state)
+cmd_state_blend(const struct cell_command_blend *state)
{
if (Debug)
printf("SPU %u: BLEND: enabled %d\n",
spu.init.id,
- state->blend_enable);
+ (state->size != 0));
+
+ ASSERT_ALIGN16(state->base);
- memcpy(&spu.blend, state, sizeof(*state));
+ if (state->size != 0) {
+ mfc_get(fb_blend_code_buffer,
+ (unsigned int) state->base, /* src */
+ ROUNDUP16(state->size),
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+ spu.blend = (blend_func) fb_blend_code_buffer;
+ spu.read_fb = state->read_fb;
+ } else {
+ /* If there is no code, use the default;
+ */
+ spu.blend = default_blend;
+ spu.read_fb = FALSE;
+ }
}
@@ -441,9 +477,8 @@ cmd_batch(uint opcode)
pos += 1;
break;
case CELL_CMD_STATE_BLEND:
- cmd_state_blend((struct pipe_blend_state *)
- &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8);
+ cmd_state_blend((struct cell_command_blend *) &buffer[pos+1]);
+ pos += (1 + ROUNDUP8(sizeof(struct cell_command_blend)) / 8);
break;
case CELL_CMD_STATE_DEPTH_STENCIL:
cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *)
@@ -587,6 +622,9 @@ one_time_init(void)
memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
invalidate_tex_cache();
+
+ spu.blend = default_blend;
+ spu.read_fb = FALSE;
}
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 444e2186452..56d0968676b 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -67,6 +67,18 @@ typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask,
qword frag_alpha, qword facing);
+struct spu_blend_results {
+ qword r;
+ qword g;
+ qword b;
+ qword a;
+};
+
+typedef struct spu_blend_results (*blend_func)(
+ qword frag_r, qword frag_g, qword frag_b, qword frag_a,
+ qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
+ qword frag_mask);
+
struct spu_framebuffer {
void *color_start; /**< addr of color surface in main memory */
void *depth_start; /**< addr of depth surface in main memory */
@@ -93,7 +105,10 @@ struct spu_global
boolean read_depth;
boolean read_stencil;
frag_test_func frag_test;
- struct pipe_blend_state blend;
+
+ boolean read_fb;
+ blend_func blend;
+
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
struct cell_command_texture texture;
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 81823f24633..c9f8cadcda3 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -29,10 +29,10 @@
* Triangle rendering within a tile.
*/
+#include <transpose_matrix4x4.h>
#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
#include "pipe/p_util.h"
-#include "spu_blend.h"
#include "spu_colorpack.h"
#include "spu_main.h"
#include "spu_texture.h"
@@ -326,27 +326,45 @@ emit_quad( int x, int y, mask_t mask )
eval_coeff(1, (float) x, (float) y, colors);
}
-#if 1
- if (spu.blend.blend_enable)
- blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors);
-#endif
- if (spu_extract(mask, 0))
- spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle);
- if (spu_extract(mask, 1))
- spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle);
- if (spu_extract(mask, 2))
- spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle);
- if (spu_extract(mask, 3))
- spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle);
+ /* Read the current framebuffer values.
+ *
+ * Ignore read_fb for now. In the future we can use this to avoid
+ * reading the framebuffer if read_fb is false and the fragment mask is
+ * all 0xffffffff. This is the common case, so it is probably worth
+ * the effort. We'll have to profile to determine whether or not the
+ * extra conditional branches hurt overall performance.
+ */
+ vec_float4 aos_pix[4] = {
+ spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]),
+ spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]),
+ spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]),
+ spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]),
+ };
-#if 0
- /* SIMD_Z with swizzled color buffer (someday) */
- vector unsigned int uicolors = *((vector unsigned int *) &colors);
- spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask);
-#endif
- }
+ qword soa_pix[4];
+ qword soa_frag[4];
+ /* Convert pixel and fragment data from AoS to SoA format.
+ */
+ _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix);
+ _transpose_matrix4x4((vec_float4 *) soa_frag, colors);
+
+ const struct spu_blend_results result =
+ (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3],
+ soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3],
+ (qword) mask);
+
+
+ /* Convert final pixel data from SoA to AoS format.
+ */
+ _transpose_matrix4x4(aos_pix, (const vec_float4 *) &result);
+
+ spu.ctile.ui[iy+0][ix+0] = spu_pack_color_shuffle(aos_pix[0], shuffle);
+ spu.ctile.ui[iy+0][ix+1] = spu_pack_color_shuffle(aos_pix[1], shuffle);
+ spu.ctile.ui[iy+1][ix+0] = spu_pack_color_shuffle(aos_pix[2], shuffle);
+ spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(aos_pix[3], shuffle);
+ }
#endif
}