summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorChristian König <[email protected]>2010-11-25 19:37:12 +0100
committerChristian König <[email protected]>2010-11-25 19:37:12 +0100
commited8b767a8e09cff4d98a44cdc07b08f1b322c4d3 (patch)
treecea881901c8da8637afb7ab96102bbd0c72febd9 /src
parentc9e10c666adc64f6c5dfb04422560508f115aa54 (diff)
[g3dvl] also use four elemets on right side multiplikation
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/vl/vl_idct.c19
1 files changed, 11 insertions, 8 deletions
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 497e67e94d3..c1550cb365a 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -167,14 +167,19 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst,
ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), start[0]);
ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_Y), tc[0]);
- ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), tc[1]);
- ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), start[1]);
+ if(fetch4[1]) {
+ ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), ureg_scalar(start[1], TGSI_SWIZZLE_Y));
+ ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_scalar(tc[1], TGSI_SWIZZLE_X));
+ } else {
+ ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), tc[1]);
+ ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), start[1]);
+ }
for(side = 0; side < 2; ++side) {
for(i = 0; i < 2; ++i) {
if(fetch4[side]) {
ureg_TEX(shader, m[i][side], TGSI_TEXTURE_2D, ureg_src(t_tc[side]), sampler[side]);
- ureg_ADD(shader, ureg_writemask(t_tc[side], TGSI_WRITEMASK_X), ureg_src(t_tc[side]), step[side]);
+ ureg_MOV(shader, ureg_writemask(t_tc[side], TGSI_WRITEMASK_X), step[side]);
} else for(j = 0; j < 4; ++j) {
/* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
@@ -256,13 +261,13 @@ create_matrix_frag_shader(struct vl_idct *idct)
start[1] = ureg_imm1f(shader, 0.0f);
step[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
- step[1] = ureg_imm1f(shader, 1.0f / BLOCK_WIDTH);
+ step[1] = ureg_imm1f(shader, 4.0f / BLOCK_WIDTH);
sampler[0] = ureg_DECL_sampler(shader, 1);
sampler[1] = ureg_DECL_sampler(shader, 0);
fetch4[0] = false;
- fetch4[1] = false;
+ fetch4[1] = true;
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
@@ -380,8 +385,6 @@ init_buffers(struct vl_idct *idct)
template.flags = 0;
idct->textures.individual.transpose = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
-
- template.width0 = 8;
idct->textures.individual.matrix = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
template.format = idct->destination->format;
@@ -535,7 +538,7 @@ init_constants(struct vl_idct *idct)
f = idct->pipe->transfer_map(idct->pipe, buf_transfer);
for(i = 0; i < BLOCK_HEIGHT; ++i)
for(j = 0; j < BLOCK_WIDTH; ++j)
- f[i * pitch * 4 + j * 4] = const_matrix[i][j];
+ f[i * pitch * 4 + j] = const_matrix[j][i]; // transpose
idct->pipe->transfer_unmap(idct->pipe, buf_transfer);
idct->pipe->transfer_destroy(idct->pipe, buf_transfer);