summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/vl
diff options
context:
space:
mode:
authorChristian König <[email protected]>2011-05-29 19:53:45 +0200
committerChristian König <[email protected]>2011-05-29 20:07:57 +0200
commit912dc8ff09cd7c28926762c2e562de5a99d3e27a (patch)
treebbb911a8722e8a7f49699517db8de278d221033a /src/gallium/auxiliary/vl
parentae56a1dd67040dc5d53f4a1622f775462f0fec05 (diff)
[g3dvl] move quantification into shaders
Diffstat (limited to 'src/gallium/auxiliary/vl')
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c107
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_decoder.c23
-rw-r--r--src/gallium/auxiliary/vl/vl_zscan.c138
-rw-r--r--src/gallium/auxiliary/vl/vl_zscan.h12
4 files changed, 168 insertions, 112 deletions
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index bf9b6cd6b11..e7fbc31ce24 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -55,7 +55,6 @@
#include <pipe/p_video_state.h>
#include "vl_vlc.h"
-#include "vl_zscan.h"
#include "vl_mpeg12_bitstream.h"
/* take num bits from the high part of bit_buf and zero extend them */
@@ -64,12 +63,6 @@
/* take num bits from the high part of bit_buf and sign extend them */
#define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num)))
-#define SATURATE(val) \
-do { \
- if ((uint32_t)(val + 2048) > 4095) \
- val = (val > 0) ? 2047 : -2048; \
-} while (0)
-
/* macroblock modes */
#define MACROBLOCK_INTRA 1
#define MACROBLOCK_PATTERN 2
@@ -721,7 +714,7 @@ get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
}
static inline void
-get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
+get_intra_block_B14(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
{
int i, val;
const DCTtab *tab;
@@ -742,12 +735,10 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
normal_code:
bs->vlc.buf <<= tab->len;
bs->vlc.bits += tab->len + 1;
- val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
+ val = tab->level * quantizer_scale;
- /* if (bitstream_get (1)) val = -val; */
val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
- SATURATE (val);
dest[i] = val;
bs->vlc.buf <<= 1;
@@ -771,9 +762,8 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
vl_vlc_dumpbits(&bs->vlc, 12);
vl_vlc_needbits(&bs->vlc);
- val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16;
+ val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
- SATURATE (val);
dest[i] = val;
vl_vlc_dumpbits(&bs->vlc, 12);
@@ -811,7 +801,7 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
}
static inline void
-get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
+get_intra_block_B15(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
{
int i, val;
const DCTtab * tab;
@@ -831,12 +821,10 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
normal_code:
bs->vlc.buf <<= tab->len;
bs->vlc.bits += tab->len + 1;
- val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
+ val = tab->level * quantizer_scale;
- /* if (bitstream_get (1)) val = -val; */
val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
- SATURATE (val);
dest[i] = val;
bs->vlc.buf <<= 1;
@@ -859,9 +847,8 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
vl_vlc_dumpbits(&bs->vlc, 12);
vl_vlc_needbits(&bs->vlc);
- val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16;
+ val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
- SATURATE (val);
dest[i] = val;
vl_vlc_dumpbits(&bs->vlc, 12);
@@ -900,7 +887,7 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
}
static inline void
-get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
+get_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
{
int i, val;
const DCTtab *tab;
@@ -927,12 +914,10 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
normal_code:
bs->vlc.buf <<= tab->len;
bs->vlc.bits += tab->len + 1;
- val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5;
+ val = ((2*tab->level+1) * quantizer_scale) >> 1;
- /* if (bitstream_get (1)) val = -val; */
val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
- SATURATE (val);
dest[i] = val;
bs->vlc.buf <<= 1;
@@ -960,9 +945,8 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
vl_vlc_dumpbits(&bs->vlc, 12);
vl_vlc_needbits(&bs->vlc);
val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1;
- val = (val * quantizer_scale * quant_matrix[i]) / 32;
+ val = (val * quantizer_scale) / 2;
- SATURATE (val);
dest[i] = val;
vl_vlc_dumpbits(&bs->vlc, 12);
@@ -999,7 +983,7 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
}
static inline void
-get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
+get_mpeg1_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
{
int i, val;
const DCTtab * tab;
@@ -1020,7 +1004,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
normal_code:
bs->vlc.buf <<= tab->len;
bs->vlc.bits += tab->len + 1;
- val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
+ val = tab->level * quantizer_scale;
/* oddification */
val = (val - 1) | 1;
@@ -1028,7 +1012,6 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
/* if (bitstream_get (1)) val = -val; */
val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
- SATURATE (val);
dest[i] = val;
bs->vlc.buf <<= 1;
@@ -1057,12 +1040,11 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
vl_vlc_dumpbits(&bs->vlc, 8);
val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
}
- val = (val * quantizer_scale * quant_matrix[i]) / 16;
+ val = val * quantizer_scale;
/* oddification */
val = (val + ~SBITS (val, 1)) | 1;
- SATURATE (val);
dest[i] = val;
vl_vlc_dumpbits(&bs->vlc, 8);
@@ -1099,7 +1081,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
}
static inline void
-get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
+get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
{
int i, val;
const DCTtab * tab;
@@ -1126,7 +1108,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
normal_code:
bs->vlc.buf <<= tab->len;
bs->vlc.bits += tab->len + 1;
- val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5;
+ val = ((2*tab->level+1) * quantizer_scale) >> 1;
/* oddification */
val = (val - 1) | 1;
@@ -1134,7 +1116,6 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
/* if (bitstream_get (1)) val = -val; */
val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
- SATURATE (val);
dest[i] = val;
bs->vlc.buf <<= 1;
@@ -1167,12 +1148,11 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
}
val = 2 * (val + SBITS (val, 1)) + 1;
- val = (val * quantizer_scale * quant_matrix[i]) / 32;
+ val = (val * quantizer_scale) / 2;
/* oddification */
val = (val + ~SBITS (val, 1)) | 1;
- SATURATE (val);
dest[i] = val;
vl_vlc_dumpbits(&bs->vlc, 8);
@@ -1209,7 +1189,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
}
static inline void
-slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc,
+slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
{
short dest[64];
@@ -1228,14 +1208,14 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs);
memset(dest, 0, sizeof(int16_t) * 64);
- dest[0] = dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
+ dest[0] = dc_dct_pred[cc];
if (picture->mpeg1) {
if (picture->picture_coding_type != D_TYPE)
- get_mpeg1_intra_block(bs, quant_matrix, quantizer_scale, dest);
+ get_mpeg1_intra_block(bs, quantizer_scale, dest);
} else if (picture->intra_vlc_format)
- get_intra_block_B15(bs, quant_matrix, quantizer_scale, dest);
+ get_intra_block_B15(bs, quantizer_scale, dest);
else
- get_intra_block_B14(bs, quant_matrix, quantizer_scale, dest);
+ get_intra_block_B14(bs, quantizer_scale, dest);
memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
@@ -1245,7 +1225,7 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
}
static inline void
-slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc,
+slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale)
{
short dest[64];
@@ -1257,9 +1237,9 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
memset(dest, 0, sizeof(int16_t) * 64);
if (picture->mpeg1)
- get_mpeg1_non_intra_block(bs, quant_matrix, quantizer_scale, dest);
+ get_mpeg1_non_intra_block(bs, quantizer_scale, dest);
else
- get_non_intra_block(bs, quant_matrix, quantizer_scale, dest);
+ get_non_intra_block(bs, quantizer_scale, dest);
memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
@@ -1571,8 +1551,7 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
}
static inline bool
-decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
- const int intra_quantizer_matrix[64], const int non_intra_quantizer_matrix[64])
+decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
{
enum pipe_video_field_select default_field_select;
struct pipe_motionvector mv_fwd, mv_bwd;
@@ -1659,12 +1638,12 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
// unravaled loop of 6 block(i) calls in macroblock()
- slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
- slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
- slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
- slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
- slice_intra_DCT(bs, picture, intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
- slice_intra_DCT(bs, picture, intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
+ slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+ slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+ slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+ slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+ slice_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
+ slice_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
if (picture->picture_coding_type == D_TYPE) {
vl_vlc_needbits(&bs->vlc);
@@ -1722,17 +1701,17 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
// TODO optimize not fully used for idct accel only mc.
if (coded_block_pattern & 0x20)
- slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0 luma 0
+ slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0 luma 0
if (coded_block_pattern & 0x10)
- slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1
+ slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1
if (coded_block_pattern & 0x08)
- slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2
+ slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2
if (coded_block_pattern & 0x04)
- slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3
+ slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3
if (coded_block_pattern & 0x2)
- slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma
+ slice_non_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma
if (coded_block_pattern & 0x1)
- slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma
+ slice_non_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma
}
dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
@@ -1845,12 +1824,6 @@ void
vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3])
{
- int intra_quantizer_matrix[64];
- int non_intra_quantizer_matrix[64];
-
- const int *scan;
- unsigned i;
-
assert(bs);
assert(num_ycbcr_blocks);
assert(buffer && num_bytes);
@@ -1859,11 +1832,5 @@ vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffe
vl_vlc_init(&bs->vlc, buffer, num_bytes);
- scan = picture->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
- for (i = 0; i < 64; ++i) {
- intra_quantizer_matrix[i] = picture->intra_quantizer_matrix[scan[i]];
- non_intra_quantizer_matrix[i] = picture->non_intra_quantizer_matrix[scan[i]];
- }
-
- while(decode_slice(bs, picture, intra_quantizer_matrix, non_intra_quantizer_matrix));
+ while(decode_slice(bs, picture));
}
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index f96d7f0e2b3..ca790e7bc75 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -312,8 +312,21 @@ vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
} else {
- for (i = 0; i < VL_MAX_PLANES; ++i)
+ static const uint8_t dummy_quant[64] = {
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
+ };
+
+ for (i = 0; i < VL_MAX_PLANES; ++i) {
vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
+ vl_zscan_upload_quant(&buf->zscan[i], dummy_quant, dummy_quant);
+ }
}
}
@@ -365,6 +378,7 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
unsigned num_ycbcr_blocks[3])
{
struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+ uint8_t intra_quantizer_matrix[64];
struct vl_mpeg12_decoder *dec;
unsigned i;
@@ -373,8 +387,13 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
assert(dec);
- for (i = 0; i < VL_MAX_PLANES; ++i)
+ memcpy(intra_quantizer_matrix, picture->intra_quantizer_matrix, sizeof(intra_quantizer_matrix));
+ intra_quantizer_matrix[0] = 1 << (7 - picture->intra_dc_precision);
+
+ for (i = 0; i < VL_MAX_PLANES; ++i) {
vl_zscan_set_layout(&buf->zscan[i], picture->alternate_scan ? dec->zscan_alternate : dec->zscan_normal);
+ vl_zscan_upload_quant(&buf->zscan[i], intra_quantizer_matrix, picture->non_intra_quantizer_matrix);
+ }
vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks);
}
diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
index 0f468dfb5ab..4af3962209f 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.c
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -136,11 +136,11 @@ create_vert_shader(struct vl_zscan *zscan)
ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
- ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), ureg_scalar(instance, TGSI_SWIZZLE_X),
+ ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(instance, TGSI_SWIZZLE_X),
ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
- ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp));
+ ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp));
for (i = 0; i < zscan->num_channels; ++i) {
ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
@@ -149,7 +149,8 @@ create_vert_shader(struct vl_zscan *zscan)
ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
- ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp),
+ ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos);
+ ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp),
ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
}
@@ -165,10 +166,10 @@ create_frag_shader(struct vl_zscan *zscan)
struct ureg_program *shader;
struct ureg_src vtex[zscan->num_channels];
- struct ureg_src src, scan, quant;
+ struct ureg_src samp_src, samp_scan, samp_quant;
struct ureg_dst tmp[zscan->num_channels];
- struct ureg_dst fragment;
+ struct ureg_dst quant, fragment;
unsigned i;
@@ -179,12 +180,13 @@ create_frag_shader(struct vl_zscan *zscan)
for (i = 0; i < zscan->num_channels; ++i)
vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);
- src = ureg_DECL_sampler(shader, 0);
- scan = ureg_DECL_sampler(shader, 1);
- quant = ureg_DECL_sampler(shader, 2);
+ samp_src = ureg_DECL_sampler(shader, 0);
+ samp_scan = ureg_DECL_sampler(shader, 1);
+ samp_quant = ureg_DECL_sampler(shader, 2);
for (i = 0; i < zscan->num_channels; ++i)
tmp[i] = ureg_DECL_temporary(shader);
+ quant = ureg_DECL_temporary(shader);
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
@@ -194,17 +196,18 @@ create_frag_shader(struct vl_zscan *zscan)
* fragment = tex(tmp, 0) * quant
*/
for (i = 0; i < zscan->num_channels; ++i)
- ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan);
+ ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan);
for (i = 0; i < zscan->num_channels; ++i)
- ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z));
+ ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W));
- for (i = 0; i < zscan->num_channels; ++i)
- ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src);
+ for (i = 0; i < zscan->num_channels; ++i) {
+ ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src);
+ ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant);
+ }
- // TODO: Fetch quant and use it
- for (i = 0; i < zscan->num_channels; ++i)
- ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f));
+ ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f));
+ ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant));
for (i = 0; i < zscan->num_channels; ++i)
ureg_release_temporary(shader, tmp[i]);
@@ -283,7 +286,7 @@ init_state(struct vl_zscan *zscan)
memset(&sampler, 0, sizeof(sampler));
sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
- sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
@@ -413,15 +416,6 @@ error_resource:
return NULL;
}
-#if 0
-// TODO
-struct pipe_sampler_view *
-vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);
-
-struct pipe_sampler_view *
-vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
-#endif
-
bool
vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
unsigned buffer_width, unsigned buffer_height,
@@ -457,16 +451,13 @@ vl_zscan_cleanup(struct vl_zscan *zscan)
cleanup_state(zscan);
}
-#if 0
-// TODO
-void
-vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
-#endif
-
bool
vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
struct pipe_sampler_view *src, struct pipe_surface *dst)
{
+ struct pipe_resource res_tmpl, *res;
+ struct pipe_sampler_view sv_tmpl;
+
assert(zscan && buffer);
memset(buffer, 0, sizeof(struct vl_zscan_buffer));
@@ -489,6 +480,28 @@ vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
buffer->fb_state.nr_cbufs = 1;
pipe_surface_reference(&buffer->fb_state.cbufs[0], dst);
+ memset(&res_tmpl, 0, sizeof(res_tmpl));
+ res_tmpl.target = PIPE_TEXTURE_3D;
+ res_tmpl.format = PIPE_FORMAT_R8_UNORM;
+ res_tmpl.width0 = BLOCK_WIDTH * zscan->blocks_per_line;
+ res_tmpl.height0 = BLOCK_HEIGHT;
+ res_tmpl.depth0 = 2;
+ res_tmpl.array_size = 1;
+ res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
+ res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
+
+ res = zscan->pipe->screen->resource_create(zscan->pipe->screen, &res_tmpl);
+ if (!res)
+ return false;
+
+ memset(&sv_tmpl, 0, sizeof(sv_tmpl));
+ u_sampler_view_default_template(&sv_tmpl, res, res->format);
+ sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = TGSI_SWIZZLE_X;
+ buffer->quant = zscan->pipe->create_sampler_view(zscan->pipe, res, &sv_tmpl);
+ pipe_resource_reference(&res, NULL);
+ if (!buffer->quant)
+ return false;
+
return true;
}
@@ -513,6 +526,65 @@ vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *la
}
void
+vl_zscan_upload_quant(struct vl_zscan_buffer *buffer,
+ const uint8_t intra_matrix[64],
+ const uint8_t non_intra_matrix[64])
+{
+ struct pipe_context *pipe;
+ struct pipe_transfer *buf_transfer;
+ unsigned x, y, i, pitch;
+ uint8_t *intra, *non_intra;
+
+ struct pipe_box rect =
+ {
+ 0, 0, 0,
+ BLOCK_WIDTH,
+ BLOCK_HEIGHT,
+ 2
+ };
+
+ assert(buffer);
+ assert(intra_matrix);
+ assert(non_intra_matrix);
+
+ pipe = buffer->zscan->pipe;
+
+ rect.width *= buffer->zscan->blocks_per_line;
+
+ buf_transfer = pipe->get_transfer
+ (
+ pipe, buffer->quant->texture,
+ 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+ &rect
+ );
+ if (!buf_transfer)
+ goto error_transfer;
+
+ pitch = buf_transfer->stride;
+
+ non_intra = pipe->transfer_map(pipe, buf_transfer);
+ if (!non_intra)
+ goto error_map;
+
+ intra = non_intra + BLOCK_HEIGHT * pitch;
+
+ for (i = 0; i < buffer->zscan->blocks_per_line; ++i)
+ for (y = 0; y < BLOCK_HEIGHT; ++y)
+ for (x = 0; x < BLOCK_WIDTH; ++x) {
+ intra[i * BLOCK_WIDTH + y * pitch + x] = intra_matrix[x + y * BLOCK_WIDTH];
+ non_intra[i * BLOCK_WIDTH + y * pitch + x] = non_intra_matrix[x + y * BLOCK_WIDTH];
+ }
+
+ pipe->transfer_unmap(pipe, buf_transfer);
+
+error_map:
+ pipe->transfer_destroy(pipe, buf_transfer);
+
+error_transfer:
+ return;
+}
+
+void
vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
{
struct vl_zscan *zscan;
@@ -523,10 +595,10 @@ vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state);
zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend);
- zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers);
+ zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 3, zscan->samplers);
zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state);
zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport);
- zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src);
+ zscan->pipe->set_fragment_sampler_views(zscan->pipe, 3, &buffer->src);
zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs);
zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs);
util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h
index ccc6bc4a8a1..be12b8e873a 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.h
+++ b/src/gallium/auxiliary/vl/vl_zscan.h
@@ -53,8 +53,6 @@ struct vl_zscan
void *samplers[3];
void *vs, *fs;
-
- struct pipe_sampler_view *quant;
};
struct vl_zscan_buffer
@@ -84,11 +82,6 @@ vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
void
vl_zscan_cleanup(struct vl_zscan *zscan);
-#if 0
-void
-vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
-#endif
-
bool
vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
struct pipe_sampler_view *src, struct pipe_surface *dst);
@@ -100,6 +93,11 @@ void
vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout);
void
+vl_zscan_upload_quant(struct vl_zscan_buffer *buffer,
+ const uint8_t intra_matrix[64],
+ const uint8_t non_intra_matrix[64]);
+
+void
vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances);
#endif