summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Paul <[email protected]>2008-09-05 13:55:02 -0600
committerBrian Paul <[email protected]>2008-09-11 10:39:28 -0600
commit0e79e474de164a765b9759398c83b6bfa16a0012 (patch)
tree6f5bd02130c27f0cfd70dd1a8667b9416bcb1751
parentbb5becf1e289b2c9240d98299e9447a9673da9fc (diff)
cell: comments, etc.
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_per_fragment.c28
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_fetch.c5
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c44
3 files changed, 52 insertions, 25 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
index 53ae3aa50e7..705867107bf 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
@@ -132,9 +132,9 @@ emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
/**
+ * Generate code to perform Z testing. Four Z values are tested at once.
* \param dsa Current depth-test state
* \param f Function to which code should be appended
- * \param m Mask of allocated / free SPE registers
* \param mask Index of register to contain depth-pass mask
* \param stored Index of register containing values from depth buffer
* \param calculated Index of register containing per-fragment depth values
@@ -198,6 +198,7 @@ emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
/**
+ * Generate code to apply the stencil operation (after testing).
* \note Emits a maximum of 5 instructions.
*
* \warning
@@ -222,9 +223,13 @@ emit_stencil_op(struct spe_function *f,
spe_il(f, result, ref);
break;
case PIPE_STENCIL_OP_INCR:
+ /* clamp = [0xff, 0xff, 0xff, 0xff] */
spe_il(f, clamp, 0x0ff);
+ /* result[i] = in[i] + 1 */
spe_ai(f, result, in, 1);
+ /* clamp_mask[i] = (result[i] > 0xff) */
spe_clgti(f, clamp_mask, result, 0x0ff);
+ /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
spe_selb(f, result, result, clamp, clamp_mask);
break;
case PIPE_STENCIL_OP_DECR:
@@ -259,10 +264,10 @@ emit_stencil_op(struct spe_function *f,
/**
+ * Generate code to do stencil test. Four pixels are tested at once.
* \param dsa Depth / stencil test state
* \param face 0 for front face, 1 for back face
* \param f Function to append instructions to
- * \param reg_mask Mask of allocated registers
* \param mask Register containing mask of fragments passing the
* alpha test
* \param depth_mask Register containing mask of fragments passing the
@@ -310,13 +315,14 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
switch (dsa->stencil[face].func) {
case PIPE_FUNC_NEVER:
- spe_il(f, stencil_mask, 0);
+ spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
break;
case PIPE_FUNC_NOTEQUAL:
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_EQUAL:
+ /* stencil_mask[i] = (stored[i] == ref) */
spe_ceqi(f, stencil_mask, stored, ref);
break;
@@ -324,6 +330,8 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_GREATER:
+ complement = TRUE;
+ /* stencil_mask[i] = (stored[i] > ref) */
spe_clgti(f, stencil_mask, stored, ref);
break;
@@ -331,8 +339,11 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_GEQUAL:
+ /* stencil_mask[i] = (stored[i] > ref) */
spe_clgti(f, stencil_mask, stored, ref);
+ /* tmp[i] = (stored[i] == ref) */
spe_ceqi(f, tmp, stored, ref);
+ /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
spe_or(f, stencil_mask, stencil_mask, tmp);
break;
@@ -461,7 +472,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
* + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
* up to 64 to make it a happy power-of-two.
*/
- spe_init_func(f, 4 * 64);
+ spe_init_func(f, SPE_INST_SIZE * 64);
/* Allocate registers for the function's input parameters. Cleverly (and
@@ -540,7 +551,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
spe_selb(f, depth, depth, zvals, mask);
}
- spe_bi(f, 0, 0, 0);
+ spe_bi(f, 0, 0, 0); /* return from function call */
#if 0
@@ -956,7 +967,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb)
* + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
* make it a happy power-of-two.
*/
- spe_init_func(f, 4 * 64);
+ spe_init_func(f, SPE_INST_SIZE * 64);
const int frag[4] = {
@@ -1144,7 +1155,8 @@ cell_generate_alpha_blend(struct cell_blend_state *cb)
}
-int PC_OFFSET(const struct spe_function *f, const void *d)
+static int
+PC_OFFSET(const struct spe_function *f, const void *d)
{
const intptr_t pc = (intptr_t) f->csr;
const intptr_t ea = ~0x0f & (intptr_t) d;
@@ -1178,7 +1190,7 @@ cell_generate_logic_op(struct spe_function *f,
* bytes (equiv. to 8 instructions) are needed for data storage. Round up
* to 64 to make it a happy power-of-two.
*/
- spe_init_func(f, 4 * 64);
+ spe_init_func(f, SPE_INST_SIZE * 64);
/* Pixel colors in framebuffer format in AoS layout.
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
index 2ece0250f6f..566df7f59e3 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
@@ -297,10 +297,9 @@ void cell_update_vertex_fetch(struct draw_context *draw)
/* Each fetch function can be a maximum of 34 instructions (note: this is
- * actually a slight over-estimate). That means (34 * 4) = 136 bytes
- * each maximum.
+ * actually a slight over-estimate).
*/
- spe_init_func(p, 136 * unique_attr_formats);
+ spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
/* Allocate registers for the function's input parameters.
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
index c0a729b3d2c..db887352265 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -35,8 +35,17 @@
#define ZERO 0x80
+
+/**
+ * Get a "quad" of four fragment Z/stencil values from the given tile.
+ * \param tile the tile of Z/stencil values
+ * \param x, y location of the quad in the tile, in pixels
+ * \param depth_format format of the tile's data
+ * \param detph returns four depth values
+ * \param stencil returns four stencil values
+ */
static void
-read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
+read_ds_quad(tile_t *tile, unsigned x, unsigned y,
enum pipe_format depth_format, qword *depth,
qword *stencil)
{
@@ -45,14 +54,13 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
switch (depth_format) {
case PIPE_FORMAT_Z16_UNORM: {
- qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
+ qword *ptr = (qword *) &tile->us8[iy][ix / 2];
const qword shuf_vec = (qword) {
ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7
};
-
/* At even X values we want the first 4 shorts, and at odd X values we
* want the second 4 shorts.
*/
@@ -65,18 +73,16 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
break;
}
-
case PIPE_FORMAT_Z32_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
+ qword *ptr = (qword *) &tile->ui4[iy][ix];
*depth = *ptr;
*stencil = si_il(0);
break;
}
-
case PIPE_FORMAT_Z24S8_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
+ qword *ptr = (qword *) &tile->ui4[iy][ix];
qword mask = si_fsmbi(0xEEEE);
*depth = si_rotmai(si_and(*ptr, mask), -8);
@@ -84,16 +90,14 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
break;
}
-
case PIPE_FORMAT_S8Z24_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
+ qword *ptr = (qword *) &tile->ui4[iy][ix];
*depth = si_and(*ptr, si_fsmbi(0x7777));
*stencil = si_andi(si_roti(*ptr, 8), 0x0ff);
break;
}
-
default:
ASSERT(0);
break;
@@ -101,6 +105,14 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
}
+/**
+ * Put a quad of Z/stencil values into a tile.
+ * \param tile the tile of Z/stencil values to write into
+ * \param x, y location of the quad in the tile, in pixels
+ * \param depth_format format of the tile's data
+ * \param detph depth values to store
+ * \param stencil stencil values to store
+ */
static void
write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
enum pipe_format depth_format,
@@ -124,14 +136,12 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
break;
}
-
case PIPE_FORMAT_Z32_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
*ptr = depth;
break;
}
-
case PIPE_FORMAT_Z24S8_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
qword mask = si_fsmbi(0xEEEE);
@@ -141,7 +151,6 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
break;
}
-
case PIPE_FORMAT_S8Z24_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
qword mask = si_fsmbi(0x7777);
@@ -151,7 +160,6 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
break;
}
-
default:
ASSERT(0);
break;
@@ -159,6 +167,14 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
}
+/**
+ * Do depth/stencil/alpha test for a "quad" of 4 fragments.
+ * \param x,y location of quad within tile
+ * \param frag_mask indicates which fragments are "alive"
+ * \param frag_depth four fragment depth values
+ * \param frag_alpha four fragment alpha values
+ * \param facing front/back facing for four fragments (1=front, 0=back)
+ */
qword
spu_do_depth_stencil(int x, int y,
qword frag_mask, qword frag_depth, qword frag_alpha,