diff options
Diffstat (limited to 'src/gallium/auxiliary')
29 files changed, 8272 insertions, 14 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index d10f9cd49cb..7dae7bc908b 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -147,14 +147,17 @@ C_SOURCES = \ util/u_transfer.c \ util/u_resource.c \ util/u_upload_mgr.c \ - util/u_vbuf_mgr.c - - # Disabling until pipe-video branch gets merged in - #vl/vl_bitstream_parser.c \ - #vl/vl_mpeg12_mc_renderer.c \ - #vl/vl_compositor.c \ - #vl/vl_csc.c \ - #vl/vl_shader_build.c \ + util/u_vbuf_mgr.c \ + vl/vl_csc.c \ + vl/vl_compositor.c \ + vl/vl_decoder.c \ + vl/vl_mpeg12_decoder.c \ + vl/vl_mpeg12_bitstream.c \ + vl/vl_zscan.c \ + vl/vl_idct.c \ + vl/vl_mc.c \ + vl/vl_vertex_buffers.c \ + vl/vl_video_buffer.c GALLIVM_SOURCES = \ gallivm/lp_bld_arit.c \ @@ -225,3 +228,4 @@ util/u_format_table.c: util/u_format_table.py util/u_format_pack.py util/u_forma util/u_half.c: util/u_half.py $(PYTHON2) util/u_half.py > $@ +# DO NOT DELETE diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 457911d2f1f..d18f55f1644 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -195,12 +195,16 @@ source = [ 'util/u_transfer.c', 'util/u_upload_mgr.c', 'util/u_vbuf_mgr.c', - # Disabling until pipe-video branch gets merged in - #'vl/vl_bitstream_parser.c', - #'vl/vl_mpeg12_mc_renderer.c', - #'vl/vl_compositor.c', - #'vl/vl_csc.c', - #'vl/vl_shader_build.c', + 'vl/vl_csc.c', + 'vl/vl_compositor.c', + 'vl/vl_decoder.c', + 'vl/vl_mpeg12_decoder.c', + 'vl/vl_mpeg12_bitstream.c', + 'vl/vl_zscan.c', + 'vl/vl_idct.c', + 'vl/vl_mc.c', + 'vl/vl_vertex_buffers.c', + 'vl/vl_video_buffer.c', ] if env['llvm']: diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index a8baad111f1..347e2beb8dd 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -259,3 +259,11 @@ PIPE_FORMAT_R32G32B32A32_FIXED , plain, 1, 1, h32 , h32 , h32 , h32 , xyzw, r PIPE_FORMAT_R10G10B10X2_USCALED , plain, 1, 1, u10 , u10 , u10 , x2 , xyz1, rgb # A.k.a. D3DDECLTYPE_DEC3N PIPE_FORMAT_R10G10B10X2_SNORM , plain, 1, 1, sn10, sn10, sn10 , x2 , xyz1, rgb + +PIPE_FORMAT_YV12 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_YV16 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_IYUV , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_NV12 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_NV21 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_IA44 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_AI44 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv diff --git a/src/gallium/auxiliary/util/u_format_yuv.c b/src/gallium/auxiliary/util/u_format_yuv.c index ab8bf29c97b..64ea0b35347 100644 --- a/src/gallium/auxiliary/util/u_format_yuv.c +++ b/src/gallium/auxiliary/util/u_format_yuv.c @@ -1045,3 +1045,138 @@ util_format_yuyv_fetch_rgba_float(float *dst, const uint8_t *src, dst[3] = 1.0f; } + +/* XXX: Stubbed for now */ +void +util_format_yv12_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv12_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv12_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv12_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv12_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} +void +util_format_yv16_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv16_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv16_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv16_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv16_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} +void +util_format_iyuv_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_iyuv_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_iyuv_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_iyuv_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_iyuv_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} +void +util_format_nv12_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv12_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv12_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv12_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv12_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} +void +util_format_nv21_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv21_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv21_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv21_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv21_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} +void +util_format_ia44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_ia44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_ia44_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_ia44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_ia44_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} +void +util_format_ai44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_ai44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_ai44_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_ai44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_ai44_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} diff --git a/src/gallium/auxiliary/util/u_format_yuv.h b/src/gallium/auxiliary/util/u_format_yuv.h index dc9632346d1..9f2365a5266 100644 --- a/src/gallium/auxiliary/util/u_format_yuv.h +++ b/src/gallium/auxiliary/util/u_format_yuv.h @@ -169,6 +169,141 @@ void util_format_yuyv_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); +/* XXX: Stubbed for now */ +void +util_format_yv12_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_yv12_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_yv12_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_yv12_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_yv12_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); +void +util_format_yv16_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_yv16_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_yv16_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_yv16_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_yv16_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); +void +util_format_iyuv_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_iyuv_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_iyuv_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_iyuv_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_iyuv_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); +void +util_format_nv12_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_nv12_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_nv12_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_nv12_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_nv12_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); +void +util_format_nv21_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_nv21_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_nv21_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_nv21_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_nv21_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); +void +util_format_ia44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_ia44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_ia44_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_ia44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_ia44_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); +void +util_format_ai44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_ai44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_ai44_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_ai44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); +void +util_format_ai44_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); + void util_format_r8g8_b8g8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, diff --git a/src/gallium/auxiliary/util/u_video.h b/src/gallium/auxiliary/util/u_video.h new file mode 100644 index 00000000000..78cceb6bcf2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_video.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_VIDEO_H +#define U_VIDEO_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <pipe/p_defines.h> + +/* u_reduce_video_profile() needs these */ +#include <pipe/p_compiler.h> +#include <util/u_debug.h> + +static INLINE enum pipe_video_codec +u_reduce_video_profile(enum pipe_video_profile profile) +{ + switch (profile) + { + case PIPE_VIDEO_PROFILE_MPEG1: + case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: + case PIPE_VIDEO_PROFILE_MPEG2_MAIN: + return PIPE_VIDEO_CODEC_MPEG12; + + case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE: + case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE: + return PIPE_VIDEO_CODEC_MPEG4; + + case PIPE_VIDEO_PROFILE_VC1_SIMPLE: + case PIPE_VIDEO_PROFILE_VC1_MAIN: + case PIPE_VIDEO_PROFILE_VC1_ADVANCED: + return PIPE_VIDEO_CODEC_VC1; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + return PIPE_VIDEO_CODEC_MPEG4_AVC; + + default: + assert(0); + return PIPE_VIDEO_CODEC_UNKNOWN; + } +} + +#endif /* U_VIDEO_H */ diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c new file mode 100644 index 00000000000..78b8d0627ce --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -0,0 +1,734 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <assert.h> + +#include <pipe/p_context.h> + +#include <util/u_memory.h> +#include <util/u_draw.h> +#include <util/u_surface.h> + +#include <tgsi/tgsi_ureg.h> + +#include "vl_csc.h" +#include "vl_types.h" +#include "vl_compositor.h" + +typedef float csc_matrix[16]; + +static void * +create_vert_shader(struct vl_compositor *c) +{ + struct ureg_program *shader; + struct ureg_src vpos, vtex; + struct ureg_dst o_vpos, o_vtex; + + shader = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!shader) + return false; + + vpos = ureg_DECL_vs_input(shader, 0); + vtex = ureg_DECL_vs_input(shader, 1); + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0); + o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1); + + /* + * o_vpos = vpos + * o_vtex = vtex + */ + ureg_MOV(shader, o_vpos, vpos); + ureg_MOV(shader, o_vtex, vtex); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, c->pipe); +} + +static void * +create_frag_shader_video_buffer(struct vl_compositor *c) +{ + struct ureg_program *shader; + struct ureg_src tc; + struct ureg_src csc[3]; + struct ureg_src sampler[3]; + struct ureg_dst texel; + struct ureg_dst fragment; + unsigned i; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return false; + + tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR); + for (i = 0; i < 3; ++i) { + csc[i] = ureg_DECL_constant(shader, i); + sampler[i] = ureg_DECL_sampler(shader, i); + } + texel = ureg_DECL_temporary(shader); + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + /* + * texel.xyz = tex(tc, sampler[i]) + * fragment = csc * texel + */ + for (i = 0; i < 3; ++i) + ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, tc, sampler[i]); + + ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); + + for (i = 0; i < 3; ++i) + ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel)); + + ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); + + ureg_release_temporary(shader, texel); + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, c->pipe); +} + +static void * +create_frag_shader_palette(struct vl_compositor *c) +{ + struct ureg_program *shader; + struct ureg_src csc[3]; + struct ureg_src tc; + struct ureg_src sampler; + struct ureg_src palette; + struct ureg_dst texel; + struct ureg_dst fragment; + unsigned i; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return false; + + for (i = 0; i < 3; ++i) + csc[i] = ureg_DECL_constant(shader, i); + + tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR); + sampler = ureg_DECL_sampler(shader, 0); + palette = ureg_DECL_sampler(shader, 1); + texel = ureg_DECL_temporary(shader); + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + /* + * texel = tex(tc, sampler) + * fragment.xyz = tex(texel, palette) * csc + * fragment.a = texel.a + */ + ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler); + ureg_MUL(shader, ureg_writemask(texel, TGSI_WRITEMASK_X), ureg_src(texel), ureg_imm1f(shader, 15.0f / 16.0f)); + ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(texel)); + + ureg_TEX(shader, texel, TGSI_TEXTURE_1D, ureg_src(texel), palette); + + for (i = 0; i < 3; ++i) + ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel)); + + ureg_release_temporary(shader, texel); + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, c->pipe); +} + +static void * +create_frag_shader_rgba(struct vl_compositor *c) +{ + struct ureg_program *shader; + struct ureg_src tc; + struct ureg_src sampler; + struct ureg_dst fragment; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return false; + + tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR); + sampler = ureg_DECL_sampler(shader, 0); + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + /* + * fragment = tex(tc, sampler) + */ + ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc, sampler); + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, c->pipe); +} + +static bool +init_shaders(struct vl_compositor *c) +{ + assert(c); + + c->vs = create_vert_shader(c); + if (!c->vs) { + debug_printf("Unable to create vertex shader.\n"); + return false; + } + + c->fs_video_buffer = create_frag_shader_video_buffer(c); + if (!c->fs_video_buffer) { + debug_printf("Unable to create YCbCr-to-RGB fragment shader.\n"); + return false; + } + + c->fs_palette = create_frag_shader_palette(c); + if (!c->fs_palette) { + debug_printf("Unable to create Palette-to-RGB fragment shader.\n"); + return false; + } + + c->fs_rgba = create_frag_shader_rgba(c); + if (!c->fs_rgba) { + debug_printf("Unable to create RGB-to-RGB fragment shader.\n"); + return false; + } + + return true; +} + +static void cleanup_shaders(struct vl_compositor *c) +{ + assert(c); + + c->pipe->delete_vs_state(c->pipe, c->vs); + c->pipe->delete_fs_state(c->pipe, c->fs_video_buffer); + c->pipe->delete_fs_state(c->pipe, c->fs_palette); + c->pipe->delete_fs_state(c->pipe, c->fs_rgba); +} + +static bool +init_pipe_state(struct vl_compositor *c) +{ + struct pipe_rasterizer_state rast; + struct pipe_sampler_state sampler; + struct pipe_blend_state blend; + + assert(c); + + c->fb_state.nr_cbufs = 1; + c->fb_state.zsbuf = NULL; + + c->viewport.scale[2] = 1; + c->viewport.scale[3] = 1; + c->viewport.translate[0] = 0; + c->viewport.translate[1] = 0; + c->viewport.translate[2] = 0; + c->viewport.translate[3] = 0; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + + c->sampler_linear = c->pipe->create_sampler_state(c->pipe, &sampler); + + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + c->sampler_nearest = c->pipe->create_sampler_state(c->pipe, &sampler); + + memset(&blend, 0, sizeof blend); + blend.independent_blend_enable = 0; + blend.rt[0].blend_enable = 1; + blend.rt[0].rgb_func = PIPE_BLEND_ADD; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; + blend.rt[0].alpha_func = PIPE_BLEND_ADD; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + blend.rt[0].colormask = PIPE_MASK_RGBA; + blend.dither = 0; + c->blend = c->pipe->create_blend_state(c->pipe, &blend); + + memset(&rast, 0, sizeof rast); + rast.flatshade = 1; + rast.front_ccw = 1; + rast.cull_face = PIPE_FACE_NONE; + rast.fill_back = PIPE_POLYGON_MODE_FILL; + rast.fill_front = PIPE_POLYGON_MODE_FILL; + rast.scissor = 1; + rast.line_width = 1; + rast.point_size_per_vertex = 1; + rast.offset_units = 1; + rast.offset_scale = 1; + rast.gl_rasterization_rules = 1; + + c->rast = c->pipe->create_rasterizer_state(c->pipe, &rast); + + return true; +} + +static void cleanup_pipe_state(struct vl_compositor *c) +{ + assert(c); + + c->pipe->delete_sampler_state(c->pipe, c->sampler_linear); + c->pipe->delete_sampler_state(c->pipe, c->sampler_nearest); + c->pipe->delete_blend_state(c->pipe, c->blend); + c->pipe->delete_rasterizer_state(c->pipe, c->rast); +} + +static bool +create_vertex_buffer(struct vl_compositor *c) +{ + assert(c); + + pipe_resource_reference(&c->vertex_buf.buffer, NULL); + c->vertex_buf.buffer = pipe_buffer_create + ( + c->pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + sizeof(struct vertex4f) * VL_COMPOSITOR_MAX_LAYERS * 4 + ); + return c->vertex_buf.buffer != NULL; +} + +static bool +init_buffers(struct vl_compositor *c) +{ + struct pipe_vertex_element vertex_elems[2]; + + assert(c); + + /* + * Create our vertex buffer and vertex buffer elements + */ + c->vertex_buf.stride = sizeof(struct vertex4f); + c->vertex_buf.buffer_offset = 0; + create_vertex_buffer(c); + + vertex_elems[0].src_offset = 0; + vertex_elems[0].instance_divisor = 0; + vertex_elems[0].vertex_buffer_index = 0; + vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + vertex_elems[1].src_offset = sizeof(struct vertex2f); + vertex_elems[1].instance_divisor = 0; + vertex_elems[1].vertex_buffer_index = 0; + vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + c->vertex_elems_state = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems); + + /* + * Create our fragment shader's constant buffer + * Const buffer contains the color conversion matrix and bias vectors + */ + /* XXX: Create with IMMUTABLE/STATIC... although it does change every once in a long while... */ + c->csc_matrix = pipe_buffer_create + ( + c->pipe->screen, + PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_STATIC, + sizeof(csc_matrix) + ); + + return true; +} + +static void +cleanup_buffers(struct vl_compositor *c) +{ + assert(c); + + c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems_state); + pipe_resource_reference(&c->vertex_buf.buffer, NULL); + pipe_resource_reference(&c->csc_matrix, NULL); +} + +static inline struct pipe_video_rect +default_rect(struct vl_compositor_layer *layer) +{ + struct pipe_resource *res = layer->sampler_views[0]->texture; + struct pipe_video_rect rect = { 0, 0, res->width0, res->height0 }; + return rect; +} + +static inline struct vertex2f +calc_topleft(struct vertex2f size, struct pipe_video_rect rect) +{ + struct vertex2f res = { rect.x / size.x, rect.y / size.y }; + return res; +} + +static inline struct vertex2f +calc_bottomright(struct vertex2f size, struct pipe_video_rect rect) +{ + struct vertex2f res = { (rect.x + rect.w) / size.x, (rect.y + rect.h) / size.y }; + return res; +} + +static inline void +calc_src_and_dst(struct vl_compositor_layer *layer, unsigned width, unsigned height, + struct pipe_video_rect src, struct pipe_video_rect dst) +{ + struct vertex2f size = { width, height }; + + layer->src.tl = calc_topleft(size, src); + layer->src.br = calc_bottomright(size, src); + layer->dst.tl = calc_topleft(size, dst); + layer->dst.br = calc_bottomright(size, dst); +} + +static void +gen_rect_verts(struct vertex4f *vb, struct vl_compositor_layer *layer) +{ + assert(vb && layer); + + vb[0].x = layer->dst.tl.x; + vb[0].y = layer->dst.tl.y; + vb[0].z = layer->src.tl.x; + vb[0].w = layer->src.tl.y; + + vb[1].x = layer->dst.br.x; + vb[1].y = layer->dst.tl.y; + vb[1].z = layer->src.br.x; + vb[1].w = layer->src.tl.y; + + vb[2].x = layer->dst.br.x; + vb[2].y = layer->dst.br.y; + vb[2].z = layer->src.br.x; + vb[2].w = layer->src.br.y; + + vb[3].x = layer->dst.tl.x; + vb[3].y = layer->dst.br.y; + vb[3].z = layer->src.tl.x; + vb[3].w = layer->src.br.y; +} + +static void +gen_vertex_data(struct vl_compositor *c) +{ + struct vertex4f *vb; + struct pipe_transfer *buf_transfer; + unsigned i; + + assert(c); + + vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | PIPE_TRANSFER_DONTBLOCK, + &buf_transfer); + + if (!vb) { + // If buffer is still locked from last draw create a new one + create_vertex_buffer(c); + vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &buf_transfer); + } + + for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; i++) { + if (c->used_layers & (1 << i)) { + struct vl_compositor_layer *layer = &c->layers[i]; + gen_rect_verts(vb, layer); + vb += 4; + + if (layer->clearing && + c->dirty_tl.x >= layer->dst.tl.x && + c->dirty_tl.y >= layer->dst.tl.y && + c->dirty_br.x <= layer->dst.br.x && + c->dirty_br.y <= layer->dst.br.y) { + + // We clear the dirty area anyway, no need for clear_render_target + c->dirty_tl.x = c->dirty_tl.y = 1.0f; + c->dirty_br.x = c->dirty_br.y = 0.0f; + } + } + } + + pipe_buffer_unmap(c->pipe, buf_transfer); +} + +static void +draw_layers(struct vl_compositor *c) +{ + unsigned vb_index, i; + + assert(c); + + for (i = 0, vb_index = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) { + if (c->used_layers & (1 << i)) { + struct vl_compositor_layer *layer = &c->layers[i]; + struct pipe_sampler_view **samplers = &layer->sampler_views[0]; + unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3; + + c->pipe->bind_fs_state(c->pipe, layer->fs); + c->pipe->bind_fragment_sampler_states(c->pipe, num_sampler_views, layer->samplers); + c->pipe->set_fragment_sampler_views(c->pipe, num_sampler_views, samplers); + util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, vb_index * 4, 4); + vb_index++; + + // Remember the currently drawn area as dirty for the next draw command + c->dirty_tl.x = MIN2(layer->dst.tl.x, c->dirty_tl.x); + c->dirty_tl.y = MIN2(layer->dst.tl.y, c->dirty_tl.y); + c->dirty_br.x = MAX2(layer->dst.br.x, c->dirty_br.x); + c->dirty_br.y = MAX2(layer->dst.br.y, c->dirty_br.y); + } + } +} + +void +vl_compositor_reset_dirty_area(struct vl_compositor *c) +{ + assert(c); + + c->dirty_tl.x = c->dirty_tl.y = 0.0f; + c->dirty_br.x = c->dirty_br.y = 1.0f; +} + +void +vl_compositor_set_clear_color(struct vl_compositor *c, float color[4]) +{ + unsigned i; + + assert(c); + + for (i = 0; i < 4; ++i) + c->clear_color[i] = color[i]; +} + +void +vl_compositor_clear_layers(struct vl_compositor *c) +{ + unsigned i, j; + + assert(c); + + c->used_layers = 0; + for ( i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) { + c->layers[i].fs = NULL; + for ( j = 0; j < 3; j++) + pipe_sampler_view_reference(&c->layers[i].sampler_views[j], NULL); + } +} + +void +vl_compositor_cleanup(struct vl_compositor *c) +{ + assert(c); + + vl_compositor_clear_layers(c); + + cleanup_buffers(c); + cleanup_shaders(c); + cleanup_pipe_state(c); +} + +void +vl_compositor_set_csc_matrix(struct vl_compositor *c, const float matrix[16]) +{ + struct pipe_transfer *buf_transfer; + + assert(c); + + memcpy + ( + pipe_buffer_map(c->pipe, c->csc_matrix, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &buf_transfer), + matrix, + sizeof(csc_matrix) + ); + + pipe_buffer_unmap(c->pipe, buf_transfer); +} + +void +vl_compositor_set_buffer_layer(struct vl_compositor *c, + unsigned layer, + struct pipe_video_buffer *buffer, + struct pipe_video_rect *src_rect, + struct pipe_video_rect *dst_rect) +{ + struct pipe_sampler_view **sampler_views; + unsigned i; + + assert(c && buffer); + + assert(layer < VL_COMPOSITOR_MAX_LAYERS); + + c->used_layers |= 1 << layer; + c->layers[layer].clearing = true; + c->layers[layer].fs = c->fs_video_buffer; + + sampler_views = buffer->get_sampler_view_components(buffer); + for (i = 0; i < 3; ++i) { + c->layers[layer].samplers[i] = c->sampler_linear; + pipe_sampler_view_reference(&c->layers[layer].sampler_views[i], sampler_views[i]); + } + + calc_src_and_dst(&c->layers[layer], buffer->width, buffer->height, + src_rect ? *src_rect : default_rect(&c->layers[layer]), + dst_rect ? *dst_rect : default_rect(&c->layers[layer])); +} + +void +vl_compositor_set_palette_layer(struct vl_compositor *c, + unsigned layer, + struct pipe_sampler_view *indexes, + struct pipe_sampler_view *palette, + struct pipe_video_rect *src_rect, + struct pipe_video_rect *dst_rect) +{ + assert(c && indexes && palette); + + assert(layer < VL_COMPOSITOR_MAX_LAYERS); + + c->used_layers |= 1 << layer; + c->layers[layer].clearing = false; + c->layers[layer].fs = c->fs_palette; + c->layers[layer].samplers[0] = c->sampler_linear; + c->layers[layer].samplers[1] = c->sampler_nearest; + c->layers[layer].samplers[2] = NULL; + pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], indexes); + pipe_sampler_view_reference(&c->layers[layer].sampler_views[1], palette); + pipe_sampler_view_reference(&c->layers[layer].sampler_views[2], NULL); + calc_src_and_dst(&c->layers[layer], indexes->texture->width0, indexes->texture->height0, + src_rect ? *src_rect : default_rect(&c->layers[layer]), + dst_rect ? *dst_rect : default_rect(&c->layers[layer])); + +} + +void +vl_compositor_set_rgba_layer(struct vl_compositor *c, + unsigned layer, + struct pipe_sampler_view *rgba, + struct pipe_video_rect *src_rect, + struct pipe_video_rect *dst_rect) +{ + assert(c && rgba); + + assert(layer < VL_COMPOSITOR_MAX_LAYERS); + + c->used_layers |= 1 << layer; + c->layers[layer].clearing = rgba->swizzle_a == PIPE_SWIZZLE_ONE; + c->layers[layer].fs = c->fs_rgba; + c->layers[layer].samplers[0] = c->sampler_linear; + c->layers[layer].samplers[1] = NULL; + c->layers[layer].samplers[2] = NULL; + pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], rgba); + pipe_sampler_view_reference(&c->layers[layer].sampler_views[1], NULL); + pipe_sampler_view_reference(&c->layers[layer].sampler_views[2], NULL); + calc_src_and_dst(&c->layers[layer], rgba->texture->width0, rgba->texture->height0, + src_rect ? *src_rect : default_rect(&c->layers[layer]), + dst_rect ? *dst_rect : default_rect(&c->layers[layer])); +} + +void +vl_compositor_render(struct vl_compositor *c, + enum pipe_mpeg12_picture_type picture_type, + struct pipe_surface *dst_surface, + struct pipe_video_rect *dst_area, + struct pipe_fence_handle **fence) +{ + struct pipe_scissor_state scissor; + + assert(c); + assert(dst_surface); + + c->fb_state.width = dst_surface->width; + c->fb_state.height = dst_surface->height; + c->fb_state.cbufs[0] = dst_surface; + + c->viewport.scale[0] = dst_surface->width; + c->viewport.scale[1] = dst_surface->height; + + if (dst_area) { + scissor.minx = dst_area->x; + scissor.miny = dst_area->y; + scissor.maxx = dst_area->x + dst_area->w; + scissor.maxy = dst_area->y + dst_area->h; + } else { + scissor.minx = 0; + scissor.miny = 0; + scissor.maxx = dst_surface->width; + scissor.maxy = dst_surface->height; + } + + gen_vertex_data(c); + + if (c->dirty_tl.x < c->dirty_br.x || c->dirty_tl.y < c->dirty_br.y) { + util_clear_render_target(c->pipe, dst_surface, c->clear_color, 0, 0, dst_surface->width, dst_surface->height); + c->dirty_tl.x = c->dirty_tl.y = 1.0f; + c->dirty_br.x = c->dirty_br.y = 0.0f; + } + + c->pipe->set_scissor_state(c->pipe, &scissor); + c->pipe->set_framebuffer_state(c->pipe, &c->fb_state); + c->pipe->set_viewport_state(c->pipe, &c->viewport); + c->pipe->bind_vs_state(c->pipe, c->vs); + c->pipe->set_vertex_buffers(c->pipe, 1, &c->vertex_buf); + c->pipe->bind_vertex_elements_state(c->pipe, c->vertex_elems_state); + c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_FRAGMENT, 0, c->csc_matrix); + c->pipe->bind_blend_state(c->pipe, c->blend); + c->pipe->bind_rasterizer_state(c->pipe, c->rast); + + draw_layers(c); + + c->pipe->flush(c->pipe, fence); +} + +bool +vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe) +{ + csc_matrix csc_matrix; + + c->pipe = pipe; + + if (!init_pipe_state(c)) + return false; + + if (!init_shaders(c)) { + cleanup_pipe_state(c); + return false; + } + if (!init_buffers(c)) { + cleanup_shaders(c); + cleanup_pipe_state(c); + return false; + } + + vl_compositor_clear_layers(c); + + vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, csc_matrix); + vl_compositor_set_csc_matrix(c, csc_matrix); + + c->clear_color[0] = c->clear_color[1] = 0.0f; + c->clear_color[2] = c->clear_color[3] = 0.0f; + vl_compositor_reset_dirty_area(c); + + return true; +} diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h new file mode 100644 index 00000000000..df662db4d91 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -0,0 +1,169 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_compositor_h +#define vl_compositor_h + +#include <pipe/p_state.h> +#include <pipe/p_video_decoder.h> +#include <pipe/p_video_state.h> + +#include "vl_types.h" + +struct pipe_context; + +/** + * composing and displaying of image data + */ + +#define VL_COMPOSITOR_MAX_LAYERS 16 + +struct vl_compositor_layer +{ + bool clearing; + + void *fs; + void *samplers[3]; + + struct pipe_sampler_view *sampler_views[3]; + struct { + struct vertex2f tl, br; + } src, dst; +}; + +struct vl_compositor +{ + struct pipe_context *pipe; + + struct pipe_framebuffer_state fb_state; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buf; + struct pipe_resource *csc_matrix; + + void *sampler_linear; + void *sampler_nearest; + void *blend; + void *rast; + void *vertex_elems_state; + + void *vs; + void *fs_video_buffer; + void *fs_palette; + void *fs_rgba; + + float clear_color[4]; + struct vertex2f dirty_tl, dirty_br; + + unsigned used_layers:VL_COMPOSITOR_MAX_LAYERS; + struct vl_compositor_layer layers[VL_COMPOSITOR_MAX_LAYERS]; +}; + +/** + * initialize this compositor + */ +bool +vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe); + +/** + * set yuv -> rgba conversion matrix + */ +void +vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float mat[16]); + +/** + * reset dirty area, so it's cleared with the clear colour + */ +void +vl_compositor_reset_dirty_area(struct vl_compositor *compositor); + +/** + * set the clear color + */ +void +vl_compositor_set_clear_color(struct vl_compositor *compositor, float color[4]); + +/** + * set overlay samplers + */ +/*@{*/ + +/** + * reset all currently set layers + */ +void +vl_compositor_clear_layers(struct vl_compositor *compositor); + +/** + * set a video buffer as a layer to render + */ +void +vl_compositor_set_buffer_layer(struct vl_compositor *compositor, + unsigned layer, + struct pipe_video_buffer *buffer, + struct pipe_video_rect *src_rect, + struct pipe_video_rect *dst_rect); + +/** + * set a paletted sampler as a layer to render + */ +void +vl_compositor_set_palette_layer(struct vl_compositor *compositor, + unsigned layer, + struct pipe_sampler_view *indexes, + struct pipe_sampler_view *palette, + struct pipe_video_rect *src_rect, + struct pipe_video_rect *dst_rect); + +/** + * set a rgba sampler as a layer to render + */ +void +vl_compositor_set_rgba_layer(struct vl_compositor *compositor, + unsigned layer, + struct pipe_sampler_view *rgba, + struct pipe_video_rect *src_rect, + struct pipe_video_rect *dst_rect); + +/*@}*/ + +/** + * render the layers to the frontbuffer + */ +void +vl_compositor_render(struct vl_compositor *compositor, + enum pipe_mpeg12_picture_type picture_type, + struct pipe_surface *dst_surface, + struct pipe_video_rect *dst_area, + struct pipe_fence_handle **fence); + +/** +* destroy this compositor +*/ +void +vl_compositor_cleanup(struct vl_compositor *compositor); + +#endif /* vl_compositor_h */ diff --git a/src/gallium/auxiliary/vl/vl_csc.c b/src/gallium/auxiliary/vl/vl_csc.c new file mode 100644 index 00000000000..00eefa293a4 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_csc.c @@ -0,0 +1,217 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <util/u_math.h> +#include <util/u_debug.h> + +#include "vl_csc.h" + +/* + * Color space conversion formulas + * + * To convert YCbCr to RGB, + * vec4 ycbcr, rgb + * mat44 csc + * rgb = csc * ycbcr + * + * To calculate the color space conversion matrix csc with ProcAmp adjustments, + * mat44 csc, cstd, procamp, bias + * csc = cstd * (procamp * bias) + * + * Where cstd is a matrix corresponding to one of the color standards (BT.601, BT.709, etc) + * adjusted for the kind of YCbCr -> RGB mapping wanted (1:1, full), + * bias is a matrix corresponding to the kind of YCbCr -> RGB mapping wanted (1:1, full) + * + * To calculate procamp, + * mat44 procamp, hue, saturation, brightness, contrast + * procamp = brightness * (saturation * (contrast * hue)) + * Alternatively, + * procamp = saturation * (brightness * (contrast * hue)) + * + * contrast + * [ c, 0, 0, 0] + * [ 0, c, 0, 0] + * [ 0, 0, c, 0] + * [ 0, 0, 0, 1] + * + * brightness + * [ 1, 0, 0, b] + * [ 0, 1, 0, 0] + * [ 0, 0, 1, 0] + * [ 0, 0, 0, 1] + * + * saturation + * [ 1, 0, 0, 0] + * [ 0, s, 0, 0] + * [ 0, 0, s, 0] + * [ 0, 0, 0, 1] + * + * hue + * [ 1, 0, 0, 0] + * [ 0, cos(h), sin(h), 0] + * [ 0, -sin(h), cos(h), 0] + * [ 0, 0, 0, 1] + * + * procamp + * [ c, 0, 0, b] + * [ 0, c*s*cos(h), c*s*sin(h), 0] + * [ 0, -c*s*sin(h), c*s*cos(h), 0] + * [ 0, 0, 0, 1] + * + * bias + * [ 1, 0, 0, ybias] + * [ 0, 1, 0, cbbias] + * [ 0, 0, 1, crbias] + * [ 0, 0, 0, 1] + * + * csc + * [ c*cstd[ 0], c*cstd[ 1]*s*cos(h) - c*cstd[ 2]*s*sin(h), c*cstd[ 2]*s*cos(h) + c*cstd[ 1]*s*sin(h), cstd[ 3] + cstd[ 0]*(b + c*ybias) + cstd[ 1]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[ 2]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + * [ c*cstd[ 4], c*cstd[ 5]*s*cos(h) - c*cstd[ 6]*s*sin(h), c*cstd[ 6]*s*cos(h) + c*cstd[ 5]*s*sin(h), cstd[ 7] + cstd[ 4]*(b + c*ybias) + cstd[ 5]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[ 6]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + * [ c*cstd[ 8], c*cstd[ 9]*s*cos(h) - c*cstd[10]*s*sin(h), c*cstd[10]*s*cos(h) + c*cstd[ 9]*s*sin(h), cstd[11] + cstd[ 8]*(b + c*ybias) + cstd[ 9]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[10]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + * [ c*cstd[12], c*cstd[13]*s*cos(h) - c*cstd[14]*s*sin(h), c*cstd[14]*s*cos(h) + c*cstd[13]*s*sin(h), cstd[15] + cstd[12]*(b + c*ybias) + cstd[13]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[14]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + */ + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const float bt_601[16] = +{ + 1.0f, 0.0f, 1.371f, 0.0f, + 1.0f, -0.336f, -0.698f, 0.0f, + 1.0f, 1.732f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +static const float bt_601_full[16] = +{ + 1.164f, 0.0f, 1.596f, 0.0f, + 1.164f, -0.391f, -0.813f, 0.0f, + 1.164f, 2.018f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const float bt_709[16] = +{ + 1.0f, 0.0f, 1.540f, 0.0f, + 1.0f, -0.183f, -0.459f, 0.0f, + 1.0f, 1.816f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +static const float bt_709_full[16] = +{ + 1.164f, 0.0f, 1.793f, 0.0f, + 1.164f, -0.213f, -0.534f, 0.0f, + 1.164f, 2.115f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +static const float identity[16] = +{ + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +const struct vl_procamp vl_default_procamp = { + .contrast = 1.0f, + .saturation = 1.0f, + .brightness = 0.0f, + .hue = 0.0f +}; + +void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs, + struct vl_procamp *procamp, + bool full_range, + float *matrix) +{ + float ybias = full_range ? -16.0f/255.0f : 0.0f; + float cbbias = -128.0f/255.0f; + float crbias = -128.0f/255.0f; + + const struct vl_procamp *p = procamp ? procamp : &vl_default_procamp; + float c = p->contrast; + float s = p->saturation; + float b = p->brightness; + float h = p->hue; + + const float *cstd; + + assert(matrix); + + switch (cs) { + case VL_CSC_COLOR_STANDARD_BT_601: + cstd = full_range ? &bt_601_full[0] : &bt_601[0]; + break; + case VL_CSC_COLOR_STANDARD_BT_709: + cstd = full_range ? &bt_709_full[0] : &bt_709[0]; + break; + case VL_CSC_COLOR_STANDARD_IDENTITY: + default: + assert(cs == VL_CSC_COLOR_STANDARD_IDENTITY); + memcpy(matrix, &identity[0], sizeof(float) * 16); + return; + } + + matrix[ 0] = c*cstd[ 0]; + matrix[ 1] = c*cstd[ 1]*s*cosf(h) - c*cstd[ 2]*s*sinf(h); + matrix[ 2] = c*cstd[ 2]*s*cosf(h) + c*cstd[ 1]*s*sinf(h); + matrix[ 3] = cstd[ 3] + cstd[ 0]*(b + c*ybias) + cstd[ 1]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[ 2]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); + + matrix[ 4] = c*cstd[ 4]; + matrix[ 5] = c*cstd[ 5]*s*cosf(h) - c*cstd[ 6]*s*sinf(h); + matrix[ 6] = c*cstd[ 6]*s*cosf(h) + c*cstd[ 5]*s*sinf(h); + matrix[ 7] = cstd[ 7] + cstd[ 4]*(b + c*ybias) + cstd[ 5]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[ 6]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); + + matrix[ 8] = c*cstd[ 8]; + matrix[ 9] = c*cstd[ 9]*s*cosf(h) - c*cstd[10]*s*sinf(h); + matrix[10] = c*cstd[10]*s*cosf(h) + c*cstd[ 9]*s*sinf(h); + matrix[11] = cstd[11] + cstd[ 8]*(b + c*ybias) + cstd[ 9]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[10]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); + + matrix[12] = c*cstd[12]; + matrix[13] = c*cstd[13]*s*cos(h) - c*cstd[14]*s*sin(h); + matrix[14] = c*cstd[14]*s*cos(h) + c*cstd[13]*s*sin(h); + matrix[15] = cstd[15] + cstd[12]*(b + c*ybias) + cstd[13]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[14]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h)); +} diff --git a/src/gallium/auxiliary/vl/vl_csc.h b/src/gallium/auxiliary/vl/vl_csc.h new file mode 100644 index 00000000000..9b73fb3aef2 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_csc.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_csc_h +#define vl_csc_h + +#include <pipe/p_compiler.h> + +struct vl_procamp +{ + float brightness; + float contrast; + float saturation; + float hue; +}; + +enum VL_CSC_COLOR_STANDARD +{ + VL_CSC_COLOR_STANDARD_IDENTITY, + VL_CSC_COLOR_STANDARD_BT_601, + VL_CSC_COLOR_STANDARD_BT_709 +}; + +extern const struct vl_procamp vl_default_procamp; + +void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs, + struct vl_procamp *procamp, + bool full_range, + float *matrix); + +#endif /* vl_csc_h */ diff --git a/src/gallium/auxiliary/vl/vl_decoder.c b/src/gallium/auxiliary/vl/vl_decoder.c new file mode 100644 index 00000000000..fac03359a0f --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_decoder.c @@ -0,0 +1,77 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <pipe/p_video_decoder.h> + +#include <util/u_video.h> + +#include "vl_decoder.h" +#include "vl_mpeg12_decoder.h" + +bool +vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile) +{ + assert(screen); + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: + return true; + default: + return false; + } +} + +struct pipe_video_decoder * +vl_create_decoder(struct pipe_context *pipe, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + unsigned buffer_width, buffer_height; + bool pot_buffers; + + assert(pipe); + assert(width > 0 && height > 0); + + pot_buffers = !pipe->screen->get_video_param + ( + pipe->screen, + profile, + PIPE_VIDEO_CAP_NPOT_TEXTURES + ); + + buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH); + buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT); + + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: + return vl_create_mpeg12_decoder(pipe, profile, entrypoint, chroma_format, buffer_width, buffer_height); + default: + return NULL; + } + return NULL; +} diff --git a/src/gallium/auxiliary/vl/vl_decoder.h b/src/gallium/auxiliary/vl/vl_decoder.h new file mode 100644 index 00000000000..0e9280dbfa2 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_decoder.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * Copyright 2011 Christian König. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_decoder_h +#define vl_decoder_h + +#include <pipe/p_video_decoder.h> + +/** + * check if a given profile is supported with shader based decoding + */ +bool +vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile); + +/** + * standard implementation of pipe->create_video_decoder + */ +struct pipe_video_decoder * +vl_create_decoder(struct pipe_context *pipe, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height); + +#endif /* vl_decoder_h */ diff --git a/src/gallium/auxiliary/vl/vl_defines.h b/src/gallium/auxiliary/vl/vl_defines.h new file mode 100644 index 00000000000..7568db027e6 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_defines.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2011 Christian König + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_defines_h +#define vl_defines_h + +/* constants usually used with all known codecs */ +#define MACROBLOCK_WIDTH 16 +#define MACROBLOCK_HEIGHT 16 + +#define BLOCK_WIDTH 8 +#define BLOCK_HEIGHT 8 + +#define VL_MAX_PLANES 3 +#define VL_MAX_REF_FRAMES 2 + +#endif diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c new file mode 100644 index 00000000000..75e76c09f63 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -0,0 +1,860 @@ +/************************************************************************** + * + * Copyright 2010 Christian König + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <assert.h> + +#include <pipe/p_context.h> +#include <pipe/p_screen.h> + +#include <util/u_draw.h> +#include <util/u_sampler.h> + +#include <tgsi/tgsi_ureg.h> + +#include "vl_defines.h" +#include "vl_types.h" +#include "vl_vertex_buffers.h" +#include "vl_idct.h" + +enum VS_OUTPUT +{ + VS_O_VPOS, + VS_O_L_ADDR0, + VS_O_L_ADDR1, + VS_O_R_ADDR0, + VS_O_R_ADDR1 +}; + +/** + * The DCT matrix stored as hex representation of floats. Equal to the following equation: + * for (i = 0; i < 8; ++i) + * for (j = 0; j < 8; ++j) + * if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f); + * else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f)); + */ +static const uint32_t const_matrix[8][8] = { + { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 }, + { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf }, + { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f }, + { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 }, + { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 }, + { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 }, + { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 }, + { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 }, +}; + +static void +calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], + struct ureg_src tc, struct ureg_src start, bool right_side, + bool transposed, float size) +{ + unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; + unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X; + + unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; + unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y; + + /* + * addr[0..1].(start) = right_side ? start.x : tc.x + * addr[0..1].(tc) = right_side ? tc.y : start.y + * addr[0..1].z = tc.z + * addr[1].(start) += 1.0f / scale + */ + ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start)); + ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc)); + + ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size)); + ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc)); +} + +static void +increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], + struct ureg_src saddr[2], bool right_side, bool transposed, + int pos, float size) +{ + unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; + unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; + + /* + * daddr[0..1].(start) = saddr[0..1].(start) + * daddr[0..1].(tc) = saddr[0..1].(tc) + */ + + ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); + ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); + ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); + ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); +} + +static void +fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], + struct ureg_src sampler, bool resource3d) +{ + ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler); + ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler); +} + +static void +matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) +{ + struct ureg_dst tmp; + + tmp = ureg_DECL_temporary(shader); + + /* + * tmp.xy = dot4(m[0][0..1], m[1][0..1]) + * dst = tmp.x + tmp.y + */ + ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); + ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); + ureg_ADD(shader, dst, + ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); + + ureg_release_temporary(shader, tmp); +} + +static void * +create_mismatch_vert_shader(struct vl_idct *idct) +{ + struct ureg_program *shader; + struct ureg_src vrect, vpos; + struct ureg_src scale; + struct ureg_dst t_tex; + struct ureg_dst o_vpos, o_addr[2]; + + shader = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!shader) + return NULL; + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + + t_tex = ureg_DECL_temporary(shader); + + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); + + o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); + o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); + + /* + * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) + * + * t_vpos = vpos + 7 / BLOCK_WIDTH + * o_vpos.xy = t_vpos * scale + * + * o_addr = calc_addr(...) + * + */ + + scale = ureg_imm2f(shader, + (float)BLOCK_WIDTH / idct->buffer_width, + (float)BLOCK_HEIGHT / idct->buffer_height); + + ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); + + ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale); + calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4); + + ureg_release_temporary(shader, t_tex); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, idct->pipe); +} + +static void * +create_mismatch_frag_shader(struct vl_idct *idct) +{ + struct ureg_program *shader; + + struct ureg_src addr[2]; + + struct ureg_dst m[8][2]; + struct ureg_dst fragment; + + unsigned i; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return NULL; + + addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); + addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); + + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + for (i = 0; i < 8; ++i) { + m[i][0] = ureg_DECL_temporary(shader); + m[i][1] = ureg_DECL_temporary(shader); + } + + for (i = 0; i < 8; ++i) { + increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height); + } + + for (i = 0; i < 8; ++i) { + struct ureg_src s_addr[2] = { ureg_src(m[i][0]), ureg_src(m[i][1]) }; + fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false); + } + + for (i = 1; i < 8; ++i) { + ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0])); + ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1])); + } + + ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1])); + ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14)); + + ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14)); + ureg_FRC(shader, m[0][0], ureg_src(m[0][0])); + ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0]))); + + ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])), + ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15))); + ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]), + ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X)); + + ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1])); + ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1])); + + for (i = 0; i < 8; ++i) { + ureg_release_temporary(shader, m[i][0]); + ureg_release_temporary(shader, m[i][1]); + } + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, idct->pipe); +} + +static void * +create_stage1_vert_shader(struct vl_idct *idct) +{ + struct ureg_program *shader; + struct ureg_src vrect, vpos; + struct ureg_src scale; + struct ureg_dst t_tex, t_start; + struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; + + shader = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!shader) + return NULL; + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + + t_tex = ureg_DECL_temporary(shader); + t_start = ureg_DECL_temporary(shader); + + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); + + o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); + o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); + + o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0); + o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1); + + /* + * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) + * + * t_vpos = vpos + vrect + * o_vpos.xy = t_vpos * scale + * o_vpos.zw = vpos + * + * o_l_addr = calc_addr(...) + * o_r_addr = calc_addr(...) + * + */ + + scale = ureg_imm2f(shader, + (float)BLOCK_WIDTH / idct->buffer_width, + (float)BLOCK_HEIGHT / idct->buffer_height); + + ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); + ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); + + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); + + ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); + + calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); + calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); + + ureg_release_temporary(shader, t_tex); + ureg_release_temporary(shader, t_start); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, idct->pipe); +} + +static void * +create_stage1_frag_shader(struct vl_idct *idct) +{ + struct ureg_program *shader; + + struct ureg_src l_addr[2], r_addr[2]; + + struct ureg_dst l[4][2], r[2]; + struct ureg_dst fragment[idct->nr_of_render_targets]; + + int i, j; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return NULL; + + l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); + l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); + + r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); + r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); + + for (i = 0; i < idct->nr_of_render_targets; ++i) + fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); + + for (i = 0; i < 4; ++i) { + l[i][0] = ureg_DECL_temporary(shader); + l[i][1] = ureg_DECL_temporary(shader); + } + + r[0] = ureg_DECL_temporary(shader); + r[1] = ureg_DECL_temporary(shader); + + for (i = 0; i < 4; ++i) { + increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height); + } + + for (i = 0; i < 4; ++i) { + struct ureg_src s_addr[2] = { ureg_src(l[i][0]), ureg_src(l[i][1]) }; + fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false); + } + + for (i = 0; i < idct->nr_of_render_targets; ++i) { + increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, BLOCK_HEIGHT); + + struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) }; + fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false); + + for (j = 0; j < 4; ++j) { + matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); + } + } + + for (i = 0; i < 4; ++i) { + ureg_release_temporary(shader, l[i][0]); + ureg_release_temporary(shader, l[i][1]); + } + ureg_release_temporary(shader, r[0]); + ureg_release_temporary(shader, r[1]); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, idct->pipe); +} + +void +vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_output, struct ureg_dst tex) +{ + struct ureg_src vrect, vpos; + struct ureg_src scale; + struct ureg_dst t_start; + struct ureg_dst o_l_addr[2], o_r_addr[2]; + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + + t_start = ureg_DECL_temporary(shader); + + --first_output; + + o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0); + o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1); + + o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0); + o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1); + + scale = ureg_imm2f(shader, + (float)BLOCK_WIDTH / idct->buffer_width, + (float)BLOCK_HEIGHT / idct->buffer_height); + + ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z), + ureg_scalar(vrect, TGSI_SWIZZLE_X), + ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets)); + ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); + + calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); + calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4); + + ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex)); + ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex)); +} + +void +vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_input, struct ureg_dst fragment) +{ + struct ureg_src l_addr[2], r_addr[2]; + + struct ureg_dst l[2], r[2]; + + --first_input; + + l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); + l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); + + r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); + r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); + + l[0] = ureg_DECL_temporary(shader); + l[1] = ureg_DECL_temporary(shader); + r[0] = ureg_DECL_temporary(shader); + r[1] = ureg_DECL_temporary(shader); + + fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false); + fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true); + + matrix_mul(shader, fragment, l, r); + + ureg_release_temporary(shader, l[0]); + ureg_release_temporary(shader, l[1]); + ureg_release_temporary(shader, r[0]); + ureg_release_temporary(shader, r[1]); +} + +static bool +init_shaders(struct vl_idct *idct) +{ + idct->vs_mismatch = create_mismatch_vert_shader(idct); + if (!idct->vs_mismatch) + goto error_vs_mismatch; + + idct->fs_mismatch = create_mismatch_frag_shader(idct); + if (!idct->fs_mismatch) + goto error_fs_mismatch; + + idct->vs = create_stage1_vert_shader(idct); + if (!idct->vs) + goto error_vs; + + idct->fs = create_stage1_frag_shader(idct); + if (!idct->fs) + goto error_fs; + + return true; + +error_fs: + idct->pipe->delete_vs_state(idct->pipe, idct->vs); + +error_vs: + idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); + +error_fs_mismatch: + idct->pipe->delete_vs_state(idct->pipe, idct->fs); + +error_vs_mismatch: + return false; +} + +static void +cleanup_shaders(struct vl_idct *idct) +{ + idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); + idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch); + idct->pipe->delete_vs_state(idct->pipe, idct->vs); + idct->pipe->delete_fs_state(idct->pipe, idct->fs); +} + +static bool +init_state(struct vl_idct *idct) +{ + struct pipe_blend_state blend; + struct pipe_rasterizer_state rs_state; + struct pipe_sampler_state sampler; + unsigned i; + + assert(idct); + + memset(&rs_state, 0, sizeof(rs_state)); + rs_state.point_size = 1; + rs_state.gl_rasterization_rules = true; + idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state); + if (!idct->rs_state) + goto error_rs_state; + + memset(&blend, 0, sizeof blend); + + blend.independent_blend_enable = 0; + blend.rt[0].blend_enable = 0; + blend.rt[0].rgb_func = PIPE_BLEND_ADD; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_func = PIPE_BLEND_ADD; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.rt[0].colormask = PIPE_MASK_RGBA; + blend.dither = 0; + idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend); + if (!idct->blend) + goto error_blend; + + for (i = 0; i < 2; ++i) { + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler); + if (!idct->samplers[i]) + goto error_samplers; + } + + return true; + +error_samplers: + for (i = 0; i < 2; ++i) + if (idct->samplers[i]) + idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); + + idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); + +error_blend: + idct->pipe->delete_blend_state(idct->pipe, idct->blend); + +error_rs_state: + return false; +} + +static void +cleanup_state(struct vl_idct *idct) +{ + unsigned i; + + for (i = 0; i < 2; ++i) + idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); + + idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); + idct->pipe->delete_blend_state(idct->pipe, idct->blend); +} + +static bool +init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer) +{ + struct pipe_resource *tex; + struct pipe_surface surf_templ; + + assert(idct && buffer); + + tex = buffer->sampler_views.individual.source->texture; + + buffer->fb_state_mismatch.width = tex->width0; + buffer->fb_state_mismatch.height = tex->height0; + buffer->fb_state_mismatch.nr_cbufs = 1; + + memset(&surf_templ, 0, sizeof(surf_templ)); + surf_templ.format = tex->format; + surf_templ.u.tex.first_layer = 0; + surf_templ.u.tex.last_layer = 0; + surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; + buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ); + + buffer->viewport_mismatch.scale[0] = tex->width0; + buffer->viewport_mismatch.scale[1] = tex->height0; + buffer->viewport_mismatch.scale[2] = 1; + buffer->viewport_mismatch.scale[3] = 1; + + return true; +} + +static void +cleanup_source(struct vl_idct *idct, struct vl_idct_buffer *buffer) +{ + assert(idct && buffer); + + pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL); + + pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL); +} + +static bool +init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) +{ + struct pipe_resource *tex; + struct pipe_surface surf_templ; + unsigned i; + + assert(idct && buffer); + + tex = buffer->sampler_views.individual.intermediate->texture; + + buffer->fb_state.width = tex->width0; + buffer->fb_state.height = tex->height0; + buffer->fb_state.nr_cbufs = idct->nr_of_render_targets; + for(i = 0; i < idct->nr_of_render_targets; ++i) { + memset(&surf_templ, 0, sizeof(surf_templ)); + surf_templ.format = tex->format; + surf_templ.u.tex.first_layer = i; + surf_templ.u.tex.last_layer = i; + surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; + buffer->fb_state.cbufs[i] = idct->pipe->create_surface( + idct->pipe, tex, &surf_templ); + + if (!buffer->fb_state.cbufs[i]) + goto error_surfaces; + } + + buffer->viewport.scale[0] = tex->width0; + buffer->viewport.scale[1] = tex->height0; + buffer->viewport.scale[2] = 1; + buffer->viewport.scale[3] = 1; + + return true; + +error_surfaces: + for(i = 0; i < idct->nr_of_render_targets; ++i) + pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); + + return false; +} + +static void +cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) +{ + unsigned i; + + assert(idct && buffer); + + for(i = 0; i < idct->nr_of_render_targets; ++i) + pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); + + pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); +} + +struct pipe_sampler_view * +vl_idct_upload_matrix(struct pipe_context *pipe, float scale) +{ + struct pipe_resource tex_templ, *matrix; + struct pipe_sampler_view sv_templ, *sv; + struct pipe_transfer *buf_transfer; + unsigned i, j, pitch; + float *f; + + struct pipe_box rect = + { + 0, 0, 0, + BLOCK_WIDTH / 4, + BLOCK_HEIGHT, + 1 + }; + + assert(pipe); + + memset(&tex_templ, 0, sizeof(tex_templ)); + tex_templ.target = PIPE_TEXTURE_2D; + tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT; + tex_templ.last_level = 0; + tex_templ.width0 = 2; + tex_templ.height0 = 8; + tex_templ.depth0 = 1; + tex_templ.array_size = 1; + tex_templ.usage = PIPE_USAGE_IMMUTABLE; + tex_templ.bind = PIPE_BIND_SAMPLER_VIEW; + tex_templ.flags = 0; + + matrix = pipe->screen->resource_create(pipe->screen, &tex_templ); + if (!matrix) + goto error_matrix; + + buf_transfer = pipe->get_transfer + ( + pipe, matrix, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &rect + ); + if (!buf_transfer) + goto error_transfer; + + pitch = buf_transfer->stride / sizeof(float); + + f = pipe->transfer_map(pipe, buf_transfer); + if (!f) + goto error_map; + + for(i = 0; i < BLOCK_HEIGHT; ++i) + for(j = 0; j < BLOCK_WIDTH; ++j) + // transpose and scale + f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale; + + pipe->transfer_unmap(pipe, buf_transfer); + pipe->transfer_destroy(pipe, buf_transfer); + + memset(&sv_templ, 0, sizeof(sv_templ)); + u_sampler_view_default_template(&sv_templ, matrix, matrix->format); + sv = pipe->create_sampler_view(pipe, matrix, &sv_templ); + pipe_resource_reference(&matrix, NULL); + if (!sv) + goto error_map; + + return sv; + +error_map: + pipe->transfer_destroy(pipe, buf_transfer); + +error_transfer: + pipe_resource_reference(&matrix, NULL); + +error_matrix: + return NULL; +} + +bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, + unsigned buffer_width, unsigned buffer_height, + unsigned nr_of_render_targets, + struct pipe_sampler_view *matrix, + struct pipe_sampler_view *transpose) +{ + assert(idct && pipe); + assert(matrix && transpose); + + idct->pipe = pipe; + idct->buffer_width = buffer_width; + idct->buffer_height = buffer_height; + idct->nr_of_render_targets = nr_of_render_targets; + + pipe_sampler_view_reference(&idct->matrix, matrix); + pipe_sampler_view_reference(&idct->transpose, transpose); + + if(!init_shaders(idct)) + return false; + + if(!init_state(idct)) { + cleanup_shaders(idct); + return false; + } + + return true; +} + +void +vl_idct_cleanup(struct vl_idct *idct) +{ + cleanup_shaders(idct); + cleanup_state(idct); + + pipe_sampler_view_reference(&idct->matrix, NULL); + pipe_sampler_view_reference(&idct->transpose, NULL); +} + +bool +vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, + struct pipe_sampler_view *source, + struct pipe_sampler_view *intermediate) +{ + assert(buffer && idct); + assert(source && intermediate); + + memset(buffer, 0, sizeof(struct vl_idct_buffer)); + + buffer->idct = idct; + + pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix); + pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source); + pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose); + pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate); + + if (!init_source(idct, buffer)) + return false; + + if (!init_intermediate(idct, buffer)) + return false; + + return true; +} + +void +vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer) +{ + assert(buffer); + + cleanup_source(buffer->idct, buffer); + cleanup_intermediate(buffer->idct, buffer); + + pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL); + pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL); +} + +void +vl_idct_flush(struct vl_idct_buffer *buffer, unsigned num_instances) +{ + struct vl_idct *idct; + assert(buffer); + + idct = buffer->idct; + + idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); + idct->pipe->bind_blend_state(idct->pipe, idct->blend); + idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); + idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); + + /* mismatch control */ + idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch); + idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport_mismatch); + idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch); + idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch); + util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances); + + /* first stage */ + idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state); + idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport); + idct->pipe->bind_vs_state(idct->pipe, idct->vs); + idct->pipe->bind_fs_state(idct->pipe, idct->fs); + util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); +} + +void +vl_idct_prepare_stage2(struct vl_idct_buffer *buffer) +{ + assert(buffer); + + /* second stage */ + buffer->idct->pipe->bind_rasterizer_state(buffer->idct->pipe, buffer->idct->rs_state); + buffer->idct->pipe->bind_fragment_sampler_states(buffer->idct->pipe, 2, buffer->idct->samplers); + buffer->idct->pipe->set_fragment_sampler_views(buffer->idct->pipe, 2, buffer->sampler_views.stage[1]); +} + diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h new file mode 100644 index 00000000000..98e2c795564 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_idct.h @@ -0,0 +1,121 @@ +/************************************************************************** + * + * Copyright 2010 Christian König + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_idct_h +#define vl_idct_h + +#include <pipe/p_state.h> + +#include <tgsi/tgsi_ureg.h> + +/* shader based inverse distinct cosinus transformation + * expect usage of vl_vertex_buffers as a todo list + */ +struct vl_idct +{ + struct pipe_context *pipe; + + unsigned buffer_width; + unsigned buffer_height; + unsigned nr_of_render_targets; + + void *rs_state; + void *blend; + + void *samplers[2]; + + void *vs_mismatch, *fs_mismatch; + void *vs, *fs; + + struct pipe_sampler_view *matrix; + struct pipe_sampler_view *transpose; +}; + +/* a set of buffers to work with */ +struct vl_idct_buffer +{ + struct vl_idct *idct; + + struct pipe_viewport_state viewport_mismatch; + struct pipe_viewport_state viewport; + + struct pipe_framebuffer_state fb_state_mismatch; + struct pipe_framebuffer_state fb_state; + + union + { + struct pipe_sampler_view *all[4]; + struct pipe_sampler_view *stage[2][2]; + struct { + struct pipe_sampler_view *source, *matrix; + struct pipe_sampler_view *intermediate, *transpose; + } individual; + } sampler_views; +}; + +/* upload the idct matrix, which can be shared by all idct instances of a pipe */ +struct pipe_sampler_view * +vl_idct_upload_matrix(struct pipe_context *pipe, float scale); + +void +vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_output, struct ureg_dst tex); + +void +vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_input, struct ureg_dst fragment); + +/* init an idct instance */ +bool +vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, + unsigned buffer_width, unsigned buffer_height, + unsigned nr_of_render_targets, + struct pipe_sampler_view *matrix, + struct pipe_sampler_view *transpose); + +/* destroy an idct instance */ +void +vl_idct_cleanup(struct vl_idct *idct); + +/* init a buffer assosiated with agiven idct instance */ +bool +vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, + struct pipe_sampler_view *source, + struct pipe_sampler_view *intermediate); + +/* cleanup a buffer of an idct instance */ +void +vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer); + +/* flush the buffer and start rendering, vertex buffers needs to be setup before calling this */ +void +vl_idct_flush(struct vl_idct_buffer *buffer, unsigned num_verts); + +void +vl_idct_prepare_stage2(struct vl_idct_buffer *buffer); + +#endif diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c new file mode 100644 index 00000000000..bd05205b52d --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -0,0 +1,660 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <assert.h> + +#include <pipe/p_context.h> + +#include <util/u_sampler.h> +#include <util/u_draw.h> + +#include <tgsi/tgsi_ureg.h> + +#include "vl_defines.h" +#include "vl_vertex_buffers.h" +#include "vl_mc.h" +#include "vl_idct.h" + +enum VS_OUTPUT +{ + VS_O_VPOS, + VS_O_VTOP, + VS_O_VBOTTOM, + + VS_O_FLAGS = VS_O_VTOP, + VS_O_VTEX = VS_O_VBOTTOM +}; + +static struct ureg_dst +calc_position(struct vl_mc *r, struct ureg_program *shader, struct ureg_src block_scale) +{ + struct ureg_src vrect, vpos; + struct ureg_dst t_vpos; + struct ureg_dst o_vpos; + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + + t_vpos = ureg_DECL_temporary(shader); + + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); + + /* + * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height) + * + * t_vpos = (vpos + vrect) * block_scale + * o_vpos.xy = t_vpos + * o_vpos.zw = vpos + */ + ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect); + ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); + + return t_vpos; +} + +static struct ureg_dst +calc_line(struct ureg_program *shader) +{ + struct ureg_dst tmp; + struct ureg_src pos; + + tmp = ureg_DECL_temporary(shader); + + pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS, TGSI_INTERPOLATE_LINEAR); + + /* + * tmp.y = fraction(pos.y / 2) >= 0.5 ? 1 : 0 + */ + ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), pos, ureg_imm1f(shader, 0.5f)); + ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp)); + ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f)); + + return tmp; +} + +static void * +create_ref_vert_shader(struct vl_mc *r) +{ + struct ureg_program *shader; + struct ureg_src mv_scale; + struct ureg_src vrect, vmv[2]; + struct ureg_dst t_vpos; + struct ureg_dst o_vpos, o_vmv[2]; + unsigned i; + + shader = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!shader) + return NULL; + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP); + vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM); + + t_vpos = calc_position(r, shader, ureg_imm2f(shader, + (float)MACROBLOCK_WIDTH / r->buffer_width, + (float)MACROBLOCK_HEIGHT / r->buffer_height) + ); + + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); + o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); + o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM); + + /* + * mv_scale.xy = 0.5 / (dst.width, dst.height); + * mv_scale.z = 1.0f / 4.0f + * mv_scale.w = 1.0f / 255.0f + * + * // Apply motion vectors + * o_vmv[0..1].xy = vmv[0..1] * mv_scale + t_vpos + * o_vmv[0..1].zw = vmv[0..1] * mv_scale + * + */ + + mv_scale = ureg_imm4f(shader, + 0.5f / r->buffer_width, + 0.5f / r->buffer_height, + 1.0f / 4.0f, + 1.0f / PIPE_VIDEO_MV_WEIGHT_MAX); + + for (i = 0; i < 2; ++i) { + ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos)); + ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), mv_scale, vmv[i]); + } + + ureg_release_temporary(shader, t_vpos); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, r->pipe); +} + +static void * +create_ref_frag_shader(struct vl_mc *r) +{ + const float y_scale = + r->buffer_height / 2 * + r->macroblock_size / MACROBLOCK_HEIGHT; + + struct ureg_program *shader; + struct ureg_src tc[2], sampler; + struct ureg_dst ref, field; + struct ureg_dst fragment; + unsigned label; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return NULL; + + tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR); + tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR); + + sampler = ureg_DECL_sampler(shader, 0); + ref = ureg_DECL_temporary(shader); + + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + field = calc_line(shader); + + /* + * ref = field.z ? tc[1] : tc[0] + * + * // Adjust tc acording to top/bottom field selection + * if (|ref.z|) { + * ref.y *= y_scale + * ref.y = floor(ref.y) + * ref.y += ref.z + * ref.y /= y_scale + * } + * fragment.xyz = tex(ref, sampler[0]) + */ + ureg_CMP(shader, ureg_writemask(ref, TGSI_WRITEMASK_XYZ), + ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), + tc[1], tc[0]); + ureg_CMP(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), + ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), + tc[1], tc[0]); + + ureg_IF(shader, ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z), &label); + + ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), + ureg_src(ref), ureg_imm1f(shader, y_scale)); + ureg_FLR(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref)); + ureg_ADD(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), + ureg_src(ref), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z)); + ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), + ureg_src(ref), ureg_imm1f(shader, 1.0f / y_scale)); + + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ENDIF(shader); + + ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), TGSI_TEXTURE_2D, ureg_src(ref), sampler); + + ureg_release_temporary(shader, ref); + + ureg_release_temporary(shader, field); + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, r->pipe); +} + +static void * +create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, void *callback_priv) +{ + struct ureg_program *shader; + + struct ureg_src vrect, vpos; + struct ureg_dst t_vpos, t_vtex; + struct ureg_dst o_vpos, o_flags; + + struct vertex2f scale = { + (float)BLOCK_WIDTH / r->buffer_width * MACROBLOCK_WIDTH / r->macroblock_size, + (float)BLOCK_HEIGHT / r->buffer_height * MACROBLOCK_HEIGHT / r->macroblock_size + }; + + unsigned label; + + shader = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!shader) + return NULL; + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + + t_vpos = calc_position(r, shader, ureg_imm2f(shader, scale.x, scale.y)); + t_vtex = ureg_DECL_temporary(shader); + + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); + o_flags = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS); + + /* + * o_vtex.xy = t_vpos + * o_flags.z = intra * 0.5 + * + * if(interlaced) { + * t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 } + * t_vtex.z = vpos.y % 2 + * t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y + * o_vpos.y = t_vtex.y + t_vpos.y + * + * o_flags.w = t_vtex.z ? 0 : 1 + * } + * + */ + + vs_callback(callback_priv, r, shader, VS_O_VTEX, t_vpos); + + ureg_MUL(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_Z), + ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f)); + ureg_MOV(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f)); + + if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO + ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label); + + ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), + ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_Y)), + ureg_imm2f(shader, 0.0f, scale.y), + ureg_imm2f(shader, -scale.y, 0.0f)); + ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), + ureg_scalar(vpos, TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.5f)); + + ureg_FRC(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_src(t_vtex)); + + ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), + ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), + ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Y)); + ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y), + ureg_src(t_vpos), ureg_src(t_vtex)); + + ureg_CMP(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), + ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), + ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f)); + + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ENDIF(shader); + } + + ureg_release_temporary(shader, t_vtex); + ureg_release_temporary(shader, t_vpos); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, r->pipe); +} + +static void * +create_ycbcr_frag_shader(struct vl_mc *r, float scale, bool invert, + vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv) +{ + struct ureg_program *shader; + struct ureg_src flags; + struct ureg_dst tmp; + struct ureg_dst fragment; + unsigned label; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return NULL; + + flags = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS, TGSI_INTERPOLATE_LINEAR); + + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + tmp = calc_line(shader); + + /* + * if (field == tc.w) + * kill(); + * else { + * fragment.xyz = tex(tc, sampler) * scale + tc.z + * fragment.w = 1.0f + * } + */ + + ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), + ureg_scalar(flags, TGSI_SWIZZLE_W), ureg_src(tmp)); + + ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label); + + ureg_KILP(shader); + + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ELSE(shader, &label); + + fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp); + + if (scale != 1.0f) + ureg_MAD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), + ureg_src(tmp), ureg_imm1f(shader, scale), + ureg_scalar(flags, TGSI_SWIZZLE_Z)); + else + ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), + ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z)); + + ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, invert ? -1.0f : 1.0f)); + ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); + + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ENDIF(shader); + + ureg_release_temporary(shader, tmp); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, r->pipe); +} + +static bool +init_pipe_state(struct vl_mc *r) +{ + struct pipe_sampler_state sampler; + struct pipe_blend_state blend; + struct pipe_rasterizer_state rs_state; + unsigned i; + + assert(r); + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + r->sampler_ref = r->pipe->create_sampler_state(r->pipe, &sampler); + if (!r->sampler_ref) + goto error_sampler_ref; + + for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { + memset(&blend, 0, sizeof blend); + blend.independent_blend_enable = 0; + blend.rt[0].blend_enable = 1; + blend.rt[0].rgb_func = PIPE_BLEND_ADD; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_func = PIPE_BLEND_ADD; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + blend.rt[0].colormask = i; + blend.dither = 0; + r->blend_clear[i] = r->pipe->create_blend_state(r->pipe, &blend); + if (!r->blend_clear[i]) + goto error_blend; + + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + r->blend_add[i] = r->pipe->create_blend_state(r->pipe, &blend); + if (!r->blend_add[i]) + goto error_blend; + + blend.rt[0].rgb_func = PIPE_BLEND_REVERSE_SUBTRACT; + blend.rt[0].alpha_dst_factor = PIPE_BLEND_REVERSE_SUBTRACT; + r->blend_sub[i] = r->pipe->create_blend_state(r->pipe, &blend); + if (!r->blend_sub[i]) + goto error_blend; + } + + memset(&rs_state, 0, sizeof(rs_state)); + /*rs_state.sprite_coord_enable */ + rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT; + rs_state.point_quad_rasterization = true; + rs_state.point_size = BLOCK_WIDTH; + rs_state.gl_rasterization_rules = true; + r->rs_state = r->pipe->create_rasterizer_state(r->pipe, &rs_state); + if (!r->rs_state) + goto error_rs_state; + + return true; + +error_rs_state: +error_blend: + for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { + if (r->blend_sub[i]) + r->pipe->delete_blend_state(r->pipe, r->blend_sub[i]); + + if (r->blend_add[i]) + r->pipe->delete_blend_state(r->pipe, r->blend_add[i]); + + if (r->blend_clear[i]) + r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]); + } + + r->pipe->delete_sampler_state(r->pipe, r->sampler_ref); + +error_sampler_ref: + return false; +} + +static void +cleanup_pipe_state(struct vl_mc *r) +{ + unsigned i; + + assert(r); + + r->pipe->delete_sampler_state(r->pipe, r->sampler_ref); + for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { + r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]); + r->pipe->delete_blend_state(r->pipe, r->blend_add[i]); + r->pipe->delete_blend_state(r->pipe, r->blend_sub[i]); + } + r->pipe->delete_rasterizer_state(r->pipe, r->rs_state); +} + +bool +vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, + unsigned buffer_width, unsigned buffer_height, + unsigned macroblock_size, float scale, + vl_mc_ycbcr_vert_shader vs_callback, + vl_mc_ycbcr_frag_shader fs_callback, + void *callback_priv) +{ + assert(renderer); + assert(pipe); + + memset(renderer, 0, sizeof(struct vl_mc)); + + renderer->pipe = pipe; + renderer->buffer_width = buffer_width; + renderer->buffer_height = buffer_height; + renderer->macroblock_size = macroblock_size; + + if (!init_pipe_state(renderer)) + goto error_pipe_state; + + renderer->vs_ref = create_ref_vert_shader(renderer); + if (!renderer->vs_ref) + goto error_vs_ref; + + renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer, vs_callback, callback_priv); + if (!renderer->vs_ycbcr) + goto error_vs_ycbcr; + + renderer->fs_ref = create_ref_frag_shader(renderer); + if (!renderer->fs_ref) + goto error_fs_ref; + + renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale, false, fs_callback, callback_priv); + if (!renderer->fs_ycbcr) + goto error_fs_ycbcr; + + renderer->fs_ycbcr_sub = create_ycbcr_frag_shader(renderer, scale, true, fs_callback, callback_priv); + if (!renderer->fs_ycbcr_sub) + goto error_fs_ycbcr_sub; + + return true; + +error_fs_ycbcr_sub: + renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr); + +error_fs_ycbcr: + renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref); + +error_fs_ref: + renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr); + +error_vs_ycbcr: + renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref); + +error_vs_ref: + cleanup_pipe_state(renderer); + +error_pipe_state: + return false; +} + +void +vl_mc_cleanup(struct vl_mc *renderer) +{ + assert(renderer); + + cleanup_pipe_state(renderer); + + renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref); + renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr); + renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref); + renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr); + renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr_sub); +} + +bool +vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer) +{ + assert(renderer && buffer); + + buffer->renderer = renderer; + + buffer->viewport.scale[2] = 1; + buffer->viewport.scale[3] = 1; + buffer->viewport.translate[0] = 0; + buffer->viewport.translate[1] = 0; + buffer->viewport.translate[2] = 0; + buffer->viewport.translate[3] = 0; + + buffer->fb_state.nr_cbufs = 1; + buffer->fb_state.zsbuf = NULL; + + return true; +} + +void +vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer) +{ + assert(buffer); +} + +void +vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface) +{ + assert(buffer && surface); + + buffer->surface_cleared = false; + + buffer->viewport.scale[0] = surface->width; + buffer->viewport.scale[1] = surface->height; + + buffer->fb_state.width = surface->width; + buffer->fb_state.height = surface->height; + buffer->fb_state.cbufs[0] = surface; +} + +static void +prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned mask) +{ + struct vl_mc *renderer; + + assert(buffer); + + renderer = buffer->renderer; + renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state); + + if (buffer->surface_cleared) + renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add[mask]); + else + renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear[mask]); + + renderer->pipe->set_framebuffer_state(renderer->pipe, &buffer->fb_state); + renderer->pipe->set_viewport_state(renderer->pipe, &buffer->viewport); +} + +void +vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref) +{ + struct vl_mc *renderer; + + assert(buffer && ref); + + prepare_pipe_4_rendering(buffer, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B); + + renderer = buffer->renderer; + + renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ref); + renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref); + + renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &ref); + renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ref); + + util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, + renderer->buffer_width / MACROBLOCK_WIDTH * + renderer->buffer_height / MACROBLOCK_HEIGHT); + + buffer->surface_cleared = true; +} + +void +vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances) +{ + struct vl_mc *renderer; + unsigned mask = 1 << component; + + assert(buffer); + + if (num_instances == 0) + return; + + prepare_pipe_4_rendering(buffer, mask); + + renderer = buffer->renderer; + + renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr); + renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr); + + util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); + + if (buffer->surface_cleared) { + renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_sub[mask]); + renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr_sub); + util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); + } +} diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h new file mode 100644 index 00000000000..9fabf02a3ac --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mc.h @@ -0,0 +1,99 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_mc_h +#define vl_mc_h + +#include <pipe/p_state.h> +#include <pipe/p_video_state.h> + +#include <tgsi/tgsi_ureg.h> + +#include "vl_defines.h" +#include "vl_types.h" + +#define VL_MC_NUM_BLENDERS (1 << VL_MAX_PLANES) + +struct pipe_context; + +struct vl_mc +{ + struct pipe_context *pipe; + unsigned buffer_width; + unsigned buffer_height; + unsigned macroblock_size; + + void *rs_state; + + void *blend_clear[VL_MC_NUM_BLENDERS]; + void *blend_add[VL_MC_NUM_BLENDERS]; + void *blend_sub[VL_MC_NUM_BLENDERS]; + void *vs_ref, *vs_ycbcr; + void *fs_ref, *fs_ycbcr, *fs_ycbcr_sub; + void *sampler_ref; +}; + +struct vl_mc_buffer +{ + struct vl_mc *renderer; + + bool surface_cleared; + + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state fb_state; +}; + +typedef void (*vl_mc_ycbcr_vert_shader)(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_output, + struct ureg_dst tex); + +typedef void (*vl_mc_ycbcr_frag_shader)(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_input, + struct ureg_dst dst); + +bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, + unsigned picture_width, unsigned picture_height, + unsigned macroblock_size, float scale, + vl_mc_ycbcr_vert_shader vs_callback, + vl_mc_ycbcr_frag_shader fs_callback, + void *callback_priv); + +void vl_mc_cleanup(struct vl_mc *renderer); + +bool vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer); + +void vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer); + +void vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface); + +void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref); + +void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances); + +#endif /* vl_mc_h */ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c new file mode 100644 index 00000000000..7a14efb627e --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c @@ -0,0 +1,1836 @@ +/************************************************************************** + * + * Copyright 2011 Christian König. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * This file is based uppon slice_xvmc.c and vlc.h from the xine project, + * which in turn is based on mpeg2dec. The following is the original copyright: + * + * Copyright (C) 2000-2002 Michel Lespinasse <[email protected]> + * Copyright (C) 1999-2000 Aaron Holtzman <[email protected]> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <stdint.h> + +#include <pipe/p_video_state.h> + +#include "vl_vlc.h" +#include "vl_mpeg12_bitstream.h" + +/* take num bits from the high part of bit_buf and zero extend them */ +#define UBITS(buf,num) (((uint32_t)(buf)) >> (32 - (num))) + +/* take num bits from the high part of bit_buf and sign extend them */ +#define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num))) + +/* macroblock modes */ +#define MACROBLOCK_INTRA 1 +#define MACROBLOCK_PATTERN 2 +#define MACROBLOCK_MOTION_BACKWARD 4 +#define MACROBLOCK_MOTION_FORWARD 8 +#define MACROBLOCK_QUANT 16 + +/* motion_type */ +#define MOTION_TYPE_MASK (3*64) +#define MOTION_TYPE_BASE 64 +#define MC_FIELD (1*64) +#define MC_FRAME (2*64) +#define MC_16X8 (2*64) +#define MC_DMV (3*64) + +/* picture structure */ +#define TOP_FIELD 1 +#define BOTTOM_FIELD 2 +#define FRAME_PICTURE 3 + +/* picture coding type (mpeg2 header) */ +#define I_TYPE 1 +#define P_TYPE 2 +#define B_TYPE 3 +#define D_TYPE 4 + +typedef struct { + uint8_t modes; + uint8_t len; +} MBtab; + +typedef struct { + uint8_t delta; + uint8_t len; +} MVtab; + +typedef struct { + int8_t dmv; + uint8_t len; +} DMVtab; + +typedef struct { + uint8_t cbp; + uint8_t len; +} CBPtab; + +typedef struct { + uint8_t size; + uint8_t len; +} DCtab; + +typedef struct { + uint8_t run; + uint8_t level; + uint8_t len; +} DCTtab; + +typedef struct { + uint8_t mba; + uint8_t len; +} MBAtab; + +#define INTRA MACROBLOCK_INTRA +#define QUANT MACROBLOCK_QUANT +#define MC MACROBLOCK_MOTION_FORWARD +#define CODED MACROBLOCK_PATTERN +#define FWD MACROBLOCK_MOTION_FORWARD +#define BWD MACROBLOCK_MOTION_BACKWARD +#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD + +static const MBtab MB_I [] = { + {INTRA|QUANT, 2}, {INTRA, 1} +}; + +static const MBtab MB_P [] = { + {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, + {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} +}; + +static const MBtab MB_B [] = { + {0, 0}, {INTRA|QUANT, 6}, + {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, + {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, + {INTRA, 5}, {INTRA, 5}, + {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, + {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} +}; + +#undef INTRA +#undef QUANT +#undef MC +#undef CODED +#undef FWD +#undef BWD +#undef INTER + +static const MVtab MV_4 [] = { + { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} +}; + +static const MVtab MV_10 [] = { + { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, + { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, + {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, + { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, + { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, + { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} +}; + +static const DMVtab DMV_2 [] = { + { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} +}; + +static const CBPtab CBP_7 [] = { + {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, + {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, + {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, + {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, + {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, + {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, + {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, + {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, + {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, + {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, + {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, + {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, + {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, + {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, + {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, + {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} +}; + +static const CBPtab CBP_9 [] = { + {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, + {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, + {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, + {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, + {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, + {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, + {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, + {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, + {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, + {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, + {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, + {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, + {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, + {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, + {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, + {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} +}; + +static const DCtab DC_lum_5 [] = { + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, + {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} +}; + +static const DCtab DC_chrom_5 [] = { + {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} +}; + +static const DCtab DC_long [] = { + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} +}; + +static const DCTtab DCT_16 [] = { + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, + { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, + { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, + { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} +}; + +static const DCTtab DCT_15 [] = { + { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, + { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, + { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, + { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, + { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, + { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, + { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, + { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, + { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, + { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, + { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, + { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} +}; + +static const DCTtab DCT_13 [] = { + { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, + { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, + { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, + { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, + { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, + { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, + { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, + { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, + { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, + { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, + { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, + { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} +}; + +static const DCTtab DCT_B14_10 [] = { + { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, + { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} +}; + +static const DCTtab DCT_B14_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, + { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, + { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, + { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, + { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} +}; + +static const DCTtab DCT_B14AC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} +}; + +static const DCTtab DCT_B14DC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} +}; + +static const DCTtab DCT_B15_10 [] = { + { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, + { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} +}; + +static const DCTtab DCT_B15_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, + { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, + { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, + { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, + { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, + { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, + { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, + { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} +}; + +static const MBAtab MBA_5 [] = { + {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, + {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} +}; + +static const MBAtab MBA_11 [] = { + {32, 11}, {31, 11}, {30, 11}, {29, 11}, + {28, 11}, {27, 11}, {26, 11}, {25, 11}, + {24, 11}, {23, 11}, {22, 11}, {21, 11}, + {20, 10}, {20, 10}, {19, 10}, {19, 10}, + {18, 10}, {18, 10}, {17, 10}, {17, 10}, + {16, 10}, {16, 10}, {15, 10}, {15, 10}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} +}; + +static const int non_linear_quantizer_scale[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 +}; + +static inline int +get_macroblock_modes(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture) +{ + int macroblock_modes; + const MBtab * tab; + + switch (picture->picture_coding_type) { + case I_TYPE: + + tab = MB_I + vl_vlc_ubits(&bs->vlc, 1); + vl_vlc_dumpbits(&bs->vlc, tab->len); + macroblock_modes = tab->modes; + + return macroblock_modes; + + case P_TYPE: + + tab = MB_P + vl_vlc_ubits(&bs->vlc, 5); + vl_vlc_dumpbits(&bs->vlc, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; + vl_vlc_dumpbits(&bs->vlc, 2); + } + return macroblock_modes; + } else if (picture->frame_pred_frame_dct) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; + vl_vlc_dumpbits(&bs->vlc, 2); + } + return macroblock_modes; + } + + case B_TYPE: + + tab = MB_B + vl_vlc_ubits(&bs->vlc, 6); + vl_vlc_dumpbits(&bs->vlc, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (! (macroblock_modes & MACROBLOCK_INTRA)) { + macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; + vl_vlc_dumpbits(&bs->vlc, 2); + } + } else if (picture->frame_pred_frame_dct) { + macroblock_modes |= MC_FRAME; + } else if (!(macroblock_modes & MACROBLOCK_INTRA)) { + macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; + vl_vlc_dumpbits(&bs->vlc, 2); + } + return macroblock_modes; + + case D_TYPE: + + vl_vlc_dumpbits(&bs->vlc, 1); + return MACROBLOCK_INTRA; + + default: + return 0; + } +} + +static inline enum pipe_mpeg12_dct_type +get_dct_type(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int macroblock_modes) +{ + enum pipe_mpeg12_dct_type dct_type = PIPE_MPEG12_DCT_TYPE_FRAME; + + if ((picture->picture_structure == FRAME_PICTURE) && + (!picture->frame_pred_frame_dct) && + (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))) { + + dct_type = vl_vlc_ubits(&bs->vlc, 1) ? PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME; + vl_vlc_dumpbits(&bs->vlc, 1); + } + return dct_type; +} + +static inline int +get_quantizer_scale(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture) +{ + int quantizer_scale_code; + + quantizer_scale_code = vl_vlc_ubits(&bs->vlc, 5); + vl_vlc_dumpbits(&bs->vlc, 5); + + if (picture->q_scale_type) + return non_linear_quantizer_scale[quantizer_scale_code]; + else + return quantizer_scale_code << 1; +} + +static inline int +get_motion_delta(struct vl_mpg12_bs *bs, unsigned f_code) +{ + int delta; + int sign; + const MVtab * tab; + + if (bs->vlc.buf & 0x80000000) { + vl_vlc_dumpbits(&bs->vlc, 1); + return 0; + } else if (bs->vlc.buf >= 0x0c000000) { + + tab = MV_4 + vl_vlc_ubits(&bs->vlc, 4); + delta = (tab->delta << f_code) + 1; + bs->vlc.bits += tab->len + f_code + 1; + bs->vlc.buf <<= tab->len; + + sign = vl_vlc_sbits(&bs->vlc, 1); + bs->vlc.buf <<= 1; + + if (f_code) + delta += vl_vlc_ubits(&bs->vlc, f_code); + bs->vlc.buf <<= f_code; + + return (delta ^ sign) - sign; + + } else { + + tab = MV_10 + vl_vlc_ubits(&bs->vlc, 10); + delta = (tab->delta << f_code) + 1; + bs->vlc.bits += tab->len + 1; + bs->vlc.buf <<= tab->len; + + sign = vl_vlc_sbits(&bs->vlc, 1); + bs->vlc.buf <<= 1; + + if (f_code) { + vl_vlc_needbits(&bs->vlc); + delta += vl_vlc_ubits(&bs->vlc, f_code); + vl_vlc_dumpbits(&bs->vlc, f_code); + } + + return (delta ^ sign) - sign; + } +} + +static inline int +bound_motion_vector(int vec, unsigned f_code) +{ +#if 1 + unsigned int limit; + int sign; + + limit = 16 << f_code; + + if ((unsigned int)(vec + limit) < 2 * limit) + return vec; + else { + sign = ((int32_t)vec) >> 31; + return vec - ((2 * limit) ^ sign) + sign; + } +#else + return ((int32_t)vec << (28 - f_code)) >> (28 - f_code); +#endif +} + +static inline int +get_dmv(struct vl_mpg12_bs *bs) +{ + const DMVtab * tab; + + tab = DMV_2 + vl_vlc_ubits(&bs->vlc, 2); + vl_vlc_dumpbits(&bs->vlc, tab->len); + return tab->dmv; +} + +static inline int +get_coded_block_pattern(struct vl_mpg12_bs *bs) +{ + const CBPtab * tab; + + vl_vlc_needbits(&bs->vlc); + + if (bs->vlc.buf >= 0x20000000) { + + tab = CBP_7 + (vl_vlc_ubits(&bs->vlc, 7) - 16); + vl_vlc_dumpbits(&bs->vlc, tab->len); + return tab->cbp; + + } else { + + tab = CBP_9 + vl_vlc_ubits(&bs->vlc, 9); + vl_vlc_dumpbits(&bs->vlc, tab->len); + return tab->cbp; + } +} + +static inline int +get_luma_dc_dct_diff(struct vl_mpg12_bs *bs) +{ + const DCtab * tab; + int size; + int dc_diff; + + if (bs->vlc.buf < 0xf8000000) { + tab = DC_lum_5 + vl_vlc_ubits(&bs->vlc, 5); + size = tab->size; + if (size) { + bs->vlc.bits += tab->len + size; + bs->vlc.buf <<= tab->len; + dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); + bs->vlc.buf <<= size; + return dc_diff; + } else { + vl_vlc_dumpbits(&bs->vlc, 3); + return 0; + } + } else { + tab = DC_long + (vl_vlc_ubits(&bs->vlc, 9) - 0x1e0); + size = tab->size; + vl_vlc_dumpbits(&bs->vlc, tab->len); + vl_vlc_needbits(&bs->vlc); + dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); + vl_vlc_dumpbits(&bs->vlc, size); + return dc_diff; + } +} + +static inline int +get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs) +{ + const DCtab * tab; + int size; + int dc_diff; + + if (bs->vlc.buf < 0xf8000000) { + tab = DC_chrom_5 + vl_vlc_ubits(&bs->vlc, 5); + size = tab->size; + if (size) { + bs->vlc.bits += tab->len + size; + bs->vlc.buf <<= tab->len; + dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); + bs->vlc.buf <<= size; + return dc_diff; + } else { + vl_vlc_dumpbits(&bs->vlc, 2); + return 0; + } + } else { + tab = DC_long + (vl_vlc_ubits(&bs->vlc, 10) - 0x3e0); + size = tab->size; + vl_vlc_dumpbits(&bs->vlc, tab->len + 1); + vl_vlc_needbits(&bs->vlc); + dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); + vl_vlc_dumpbits(&bs->vlc, size); + return dc_diff; + } +} + +static inline void +get_intra_block_B14(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) +{ + int i, val; + const DCTtab *tab; + + i = 0; + + vl_vlc_needbits(&bs->vlc); + + while (1) { + if (bs->vlc.buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + bs->vlc.buf <<= tab->len; + bs->vlc.bits += tab->len + 1; + val = tab->level * quantizer_scale; + + val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); + + dest[i] = val; + + bs->vlc.buf <<= 1; + vl_vlc_needbits(&bs->vlc); + + continue; + + } else if (bs->vlc.buf >= 0x04000000) { + + tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS(bs->vlc.buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + vl_vlc_dumpbits(&bs->vlc, 12); + vl_vlc_needbits(&bs->vlc); + val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale; + + dest[i] = val; + + vl_vlc_dumpbits(&bs->vlc, 12); + vl_vlc_needbits(&bs->vlc); + + continue; + + } else if (bs->vlc.buf >= 0x02000000) { + tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bs->vlc.buf >= 0x00800000) { + tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bs->vlc.buf >= 0x00200000) { + tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); + bs->vlc.buf <<= 16; + vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + + vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ +} + +static inline void +get_intra_block_B15(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) +{ + int i, val; + const DCTtab * tab; + + i = 0; + + vl_vlc_needbits(&bs->vlc); + + while (1) { + if (bs->vlc.buf >= 0x04000000) { + + tab = DCT_B15_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); + + i += tab->run; + if (i < 64) { + + normal_code: + bs->vlc.buf <<= tab->len; + bs->vlc.bits += tab->len + 1; + val = tab->level * quantizer_scale; + + val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); + + dest[i] = val; + + bs->vlc.buf <<= 1; + vl_vlc_needbits(&bs->vlc); + + continue; + + } else { + + /* end of block. I commented out this code because if we */ + /* dont exit here we will still exit at the later test :) */ + + /* if (i >= 128) break; */ /* end of block */ + + /* escape code */ + + i += UBITS(bs->vlc.buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check against buffer overflow */ + + vl_vlc_dumpbits(&bs->vlc, 12); + vl_vlc_needbits(&bs->vlc); + val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale; + + dest[i] = val; + + vl_vlc_dumpbits(&bs->vlc, 12); + vl_vlc_needbits(&bs->vlc); + + continue; + + } + } else if (bs->vlc.buf >= 0x02000000) { + tab = DCT_B15_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bs->vlc.buf >= 0x00800000) { + tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bs->vlc.buf >= 0x00200000) { + tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); + bs->vlc.buf <<= 16; + vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + + vl_vlc_dumpbits(&bs->vlc, 4); /* dump end of block code */ +} + +static inline void +get_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) +{ + int i, val; + const DCTtab *tab; + + i = -1; + + vl_vlc_needbits(&bs->vlc); + if (bs->vlc.buf >= 0x28000000) { + tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bs->vlc.buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + bs->vlc.buf <<= tab->len; + bs->vlc.bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale) >> 1; + + val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); + + dest[i] = val; + + bs->vlc.buf <<= 1; + vl_vlc_needbits(&bs->vlc); + + continue; + + } + + entry_2: + if (bs->vlc.buf >= 0x04000000) { + + tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS(bs->vlc.buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + vl_vlc_dumpbits(&bs->vlc, 12); + vl_vlc_needbits(&bs->vlc); + val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1; + val = (val * quantizer_scale) / 2; + + dest[i] = val; + + vl_vlc_dumpbits(&bs->vlc, 12); + vl_vlc_needbits(&bs->vlc); + + continue; + + } else if (bs->vlc.buf >= 0x02000000) { + tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bs->vlc.buf >= 0x00800000) { + tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bs->vlc.buf >= 0x00200000) { + tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); + bs->vlc.buf <<= 16; + vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ +} + +static inline void +get_mpeg1_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) +{ + int i, val; + const DCTtab * tab; + + i = 0; + + vl_vlc_needbits(&bs->vlc); + + while (1) { + if (bs->vlc.buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + bs->vlc.buf <<= tab->len; + bs->vlc.bits += tab->len + 1; + val = tab->level * quantizer_scale; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); + + dest[i] = val; + + bs->vlc.buf <<= 1; + vl_vlc_needbits(&bs->vlc); + + continue; + + } else if (bs->vlc.buf >= 0x04000000) { + + tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS(bs->vlc.buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + vl_vlc_dumpbits(&bs->vlc, 12); + vl_vlc_needbits(&bs->vlc); + val = vl_vlc_sbits(&bs->vlc, 8); + if (! (val & 0x7f)) { + vl_vlc_dumpbits(&bs->vlc, 8); + val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val; + } + val = val * quantizer_scale; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + dest[i] = val; + + vl_vlc_dumpbits(&bs->vlc, 8); + vl_vlc_needbits(&bs->vlc); + + continue; + + } else if (bs->vlc.buf >= 0x02000000) { + tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bs->vlc.buf >= 0x00800000) { + tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bs->vlc.buf >= 0x00200000) { + tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); + bs->vlc.buf <<= 16; + vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ +} + +static inline void +get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) +{ + int i, val; + const DCTtab * tab; + + i = -1; + + vl_vlc_needbits(&bs->vlc); + if (bs->vlc.buf >= 0x28000000) { + tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bs->vlc.buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + bs->vlc.buf <<= tab->len; + bs->vlc.bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale) >> 1; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); + + dest[i] = val; + + bs->vlc.buf <<= 1; + vl_vlc_needbits(&bs->vlc); + + continue; + + } + + entry_2: + if (bs->vlc.buf >= 0x04000000) { + + tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS(bs->vlc.buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + vl_vlc_dumpbits(&bs->vlc, 12); + vl_vlc_needbits(&bs->vlc); + val = vl_vlc_sbits(&bs->vlc, 8); + if (! (val & 0x7f)) { + vl_vlc_dumpbits(&bs->vlc, 8); + val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val; + } + val = 2 * (val + SBITS (val, 1)) + 1; + val = (val * quantizer_scale) / 2; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + dest[i] = val; + + vl_vlc_dumpbits(&bs->vlc, 8); + vl_vlc_needbits(&bs->vlc); + + continue; + + } else if (bs->vlc.buf >= 0x02000000) { + tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bs->vlc.buf >= 0x00800000) { + tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bs->vlc.buf >= 0x00200000) { + tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); + bs->vlc.buf <<= 16; + vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ +} + +static inline void +slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc, + unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3]) +{ + short dest[64]; + + bs->ycbcr_stream[cc]->x = x; + bs->ycbcr_stream[cc]->y = y; + bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_INTRA; + bs->ycbcr_stream[cc]->coding = coding; + + vl_vlc_needbits(&bs->vlc); + + /* Get the intra DC coefficient and inverse quantize it */ + if (cc == 0) + dc_dct_pred[0] += get_luma_dc_dct_diff(bs); + else + dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs); + + memset(dest, 0, sizeof(int16_t) * 64); + dest[0] = dc_dct_pred[cc]; + if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) { + if (picture->picture_coding_type != D_TYPE) + get_mpeg1_intra_block(bs, quantizer_scale, dest); + } else if (picture->intra_vlc_format) + get_intra_block_B15(bs, quantizer_scale, dest); + else + get_intra_block_B14(bs, quantizer_scale, dest); + + memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); + + bs->num_ycbcr_blocks[cc]++; + bs->ycbcr_stream[cc]++; + bs->ycbcr_buffer[cc] += 64; +} + +static inline void +slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc, + unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale) +{ + short dest[64]; + + bs->ycbcr_stream[cc]->x = x; + bs->ycbcr_stream[cc]->y = y; + bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_DELTA; + bs->ycbcr_stream[cc]->coding = coding; + + memset(dest, 0, sizeof(int16_t) * 64); + if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) + get_mpeg1_non_intra_block(bs, quantizer_scale, dest); + else + get_non_intra_block(bs, quantizer_scale, dest); + + memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); + + bs->num_ycbcr_blocks[cc]++; + bs->ycbcr_stream[cc]++; + bs->ycbcr_buffer[cc] += 64; +} + +static inline void +motion_mp1(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) +{ + int motion_x, motion_y; + + mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; + + vl_vlc_needbits(&bs->vlc); + motion_x = (mv->top.x + (get_motion_delta(bs, f_code[0]) << f_code[1])); + motion_x = bound_motion_vector (motion_x, f_code[0] + f_code[1]); + mv->top.x = mv->bottom.x = motion_x; + + vl_vlc_needbits(&bs->vlc); + motion_y = (mv->top.y + (get_motion_delta(bs, f_code[0]) << f_code[1])); + motion_y = bound_motion_vector (motion_y, f_code[0] + f_code[1]); + mv->top.y = mv->bottom.y = motion_y; +} + +static inline void +motion_fr_frame(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) +{ + int motion_x, motion_y; + + mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; + + vl_vlc_needbits(&bs->vlc); + motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); + motion_x = bound_motion_vector(motion_x, f_code[0]); + mv->top.x = mv->bottom.x = motion_x; + + vl_vlc_needbits(&bs->vlc); + motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); + motion_y = bound_motion_vector(motion_y, f_code[1]); + mv->top.y = mv->bottom.y = motion_y; +} + +static inline void +motion_fr_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) +{ + int motion_x, motion_y; + + vl_vlc_needbits(&bs->vlc); + mv->top.field_select = vl_vlc_ubits(&bs->vlc, 1) ? + PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; + vl_vlc_dumpbits(&bs->vlc, 1); + + motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); + motion_x = bound_motion_vector (motion_x, f_code[0]); + mv->top.x = motion_x; + + vl_vlc_needbits(&bs->vlc); + motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]); + /* motion_y = bound_motion_vector (motion_y, f_code[1]); */ + mv->top.y = motion_y << 1; + + vl_vlc_needbits(&bs->vlc); + mv->bottom.field_select = vl_vlc_ubits(&bs->vlc, 1) ? + PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; + vl_vlc_dumpbits(&bs->vlc, 1); + + motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]); + motion_x = bound_motion_vector (motion_x, f_code[0]); + mv->bottom.x = motion_x; + + vl_vlc_needbits(&bs->vlc); + motion_y = (mv->bottom.y >> 1) + get_motion_delta(bs, f_code[1]); + /* motion_y = bound_motion_vector (motion_y, f_code[1]); */ + mv->bottom.y = motion_y << 1; +} + +static inline void +motion_fr_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) +{ + int motion_x, motion_y; + + // TODO Implement dmv + mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; + + vl_vlc_needbits(&bs->vlc); + motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); + motion_x = bound_motion_vector(motion_x, f_code[0]); + mv->top.x = mv->bottom.x = motion_x; + + vl_vlc_needbits(&bs->vlc); + motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]); + /* motion_y = bound_motion_vector (motion_y, f_code[1]); */ + mv->top.y = mv->bottom.y = motion_y << 1; +} + +/* like motion_frame, but parsing without actual motion compensation */ +static inline void +motion_fr_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) +{ + int tmp; + + mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; + + vl_vlc_needbits(&bs->vlc); + tmp = (mv->top.x + get_motion_delta(bs, f_code[0])); + tmp = bound_motion_vector (tmp, f_code[0]); + mv->top.x = mv->bottom.x = tmp; + + vl_vlc_needbits(&bs->vlc); + tmp = (mv->top.y + get_motion_delta(bs, f_code[1])); + tmp = bound_motion_vector (tmp, f_code[1]); + mv->top.y = mv->bottom.y = tmp; + + vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */ +} + +static inline void +motion_fi_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) +{ + int motion_x, motion_y; + + vl_vlc_needbits(&bs->vlc); + + // ref_field + //vl_vlc_ubits(&bs->vlc, 1); + + // TODO field select may need to do something here for bob (weave ok) + mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; + vl_vlc_dumpbits(&bs->vlc, 1); + + motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); + motion_x = bound_motion_vector (motion_x, f_code[0]); + mv->top.x = mv->bottom.x = motion_x; + + vl_vlc_needbits(&bs->vlc); + motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); + motion_y = bound_motion_vector (motion_y, f_code[1]); + mv->top.y = mv->bottom.y = motion_y; +} + +static inline void +motion_fi_16x8(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) +{ + int motion_x, motion_y; + + vl_vlc_needbits(&bs->vlc); + + // ref_field + //vl_vlc_ubits(&bs->vlc, 1); + + // TODO field select may need to do something here bob (weave ok) + mv->top.field_select = PIPE_VIDEO_FRAME; + vl_vlc_dumpbits(&bs->vlc, 1); + + motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); + motion_x = bound_motion_vector (motion_x, f_code[0]); + mv->top.x = motion_x; + + vl_vlc_needbits(&bs->vlc); + motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); + motion_y = bound_motion_vector (motion_y, f_code[1]); + mv->top.y = motion_y; + + vl_vlc_needbits(&bs->vlc); + // ref_field + //vl_vlc_ubits(&bs->vlc, 1); + + // TODO field select may need to do something here for bob (weave ok) + mv->bottom.field_select = PIPE_VIDEO_FRAME; + vl_vlc_dumpbits(&bs->vlc, 1); + + motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]); + motion_x = bound_motion_vector (motion_x, f_code[0]); + mv->bottom.x = motion_x; + + vl_vlc_needbits(&bs->vlc); + motion_y = mv->bottom.y + get_motion_delta(bs, f_code[1]); + motion_y = bound_motion_vector (motion_y, f_code[1]); + mv->bottom.y = motion_y; +} + +static inline void +motion_fi_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) +{ + int motion_x, motion_y; + + // TODO field select may need to do something here for bob (weave ok) + mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; + + vl_vlc_needbits(&bs->vlc); + motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); + motion_x = bound_motion_vector (motion_x, f_code[0]); + mv->top.x = mv->bottom.x = motion_x; + + vl_vlc_needbits(&bs->vlc); + motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); + motion_y = bound_motion_vector (motion_y, f_code[1]); + mv->top.y = mv->bottom.y = motion_y; +} + + +static inline void +motion_fi_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) +{ + int tmp; + + vl_vlc_needbits(&bs->vlc); + vl_vlc_dumpbits(&bs->vlc, 1); /* remove field_select */ + + tmp = (mv->top.x + get_motion_delta(bs, f_code[0])); + tmp = bound_motion_vector(tmp, f_code[0]); + mv->top.x = mv->bottom.x = tmp; + + vl_vlc_needbits(&bs->vlc); + tmp = (mv->top.y + get_motion_delta(bs, f_code[1])); + tmp = bound_motion_vector(tmp, f_code[1]); + mv->top.y = mv->bottom.y = tmp; + + vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */ +} + +#define MOTION_CALL(routine, macroblock_modes) \ +do { \ + if ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD) \ + routine(bs, picture->f_code[0], &mv_fwd); \ + if ((macroblock_modes) & MACROBLOCK_MOTION_BACKWARD) \ + routine(bs, picture->f_code[1], &mv_bwd); \ +} while (0) + +static inline void +store_motionvectors(struct vl_mpg12_bs *bs, unsigned *mv_pos, + struct pipe_motionvector *mv_fwd, + struct pipe_motionvector *mv_bwd) +{ + bs->mv_stream[0][*mv_pos].top = mv_fwd->top; + bs->mv_stream[0][*mv_pos].bottom = + mv_fwd->top.field_select == PIPE_VIDEO_FRAME ? + mv_fwd->top : mv_fwd->bottom; + + bs->mv_stream[1][*mv_pos].top = mv_bwd->top; + bs->mv_stream[1][*mv_pos].bottom = + mv_bwd->top.field_select == PIPE_VIDEO_FRAME ? + mv_bwd->top : mv_bwd->bottom; + + (*mv_pos)++; +} + +static inline bool +slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, + int *quantizer_scale, unsigned *x, unsigned *y, unsigned *mv_pos) +{ + const MBAtab * mba; + + vl_vlc_need32bits(&bs->vlc); + while(bs->vlc.buf < 0x101 || bs->vlc.buf > 0x1AF) { + if(!vl_vlc_getbyte(&bs->vlc)) + return false; + } + *y = (bs->vlc.buf & 0xFF) - 1; + vl_vlc_restart(&bs->vlc); + + *quantizer_scale = get_quantizer_scale(bs, picture); + + /* ignore intra_slice and all the extra data */ + while (bs->vlc.buf & 0x80000000) { + vl_vlc_dumpbits(&bs->vlc, 9); + vl_vlc_needbits(&bs->vlc); + } + + /* decode initial macroblock address increment */ + *x = 0; + while (1) { + if (bs->vlc.buf >= 0x08000000) { + mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 6) - 2); + break; + } else if (bs->vlc.buf >= 0x01800000) { + mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 12) - 24); + break; + } else switch (vl_vlc_ubits(&bs->vlc, 12)) { + case 8: /* macroblock_escape */ + *x += 33; + vl_vlc_dumpbits(&bs->vlc, 11); + vl_vlc_needbits(&bs->vlc); + continue; + case 15: /* macroblock_stuffing (MPEG1 only) */ + bs->vlc.buf &= 0xfffff; + vl_vlc_dumpbits(&bs->vlc, 11); + vl_vlc_needbits(&bs->vlc); + continue; + default: /* error */ + return false; + } + } + vl_vlc_dumpbits(&bs->vlc, mba->len + 1); + *x += mba->mba; + + while (*x >= bs->width) { + *x -= bs->width; + (*y)++; + } + if (*y > bs->height) + return false; + + *mv_pos = *x + *y * bs->width; + + return true; +} + +static inline bool +decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture) +{ + enum pipe_video_field_select default_field_select; + struct pipe_motionvector mv_fwd, mv_bwd; + enum pipe_mpeg12_dct_type dct_type; + + /* predictor for DC coefficients in intra blocks */ + int dc_dct_pred[3] = { 0, 0, 0 }; + int quantizer_scale; + + unsigned x, y, mv_pos; + + switch(picture->picture_structure) { + case TOP_FIELD: + default_field_select = PIPE_VIDEO_TOP_FIELD; + break; + + case BOTTOM_FIELD: + default_field_select = PIPE_VIDEO_BOTTOM_FIELD; + break; + + default: + default_field_select = PIPE_VIDEO_FRAME; + break; + } + + if (!slice_init(bs, picture, &quantizer_scale, &x, &y, &mv_pos)) + return false; + + mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; + mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select; + + mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; + mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select; + + while (1) { + int macroblock_modes; + int mba_inc; + const MBAtab * mba; + + vl_vlc_needbits(&bs->vlc); + + macroblock_modes = get_macroblock_modes(bs, picture); + dct_type = get_dct_type(bs, picture, macroblock_modes); + + switch(macroblock_modes & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD)) { + case (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD): + mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF; + mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF; + break; + + default: + mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select; + mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select; + + /* fall through */ + case MACROBLOCK_MOTION_FORWARD: + mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; + mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; + break; + + case MACROBLOCK_MOTION_BACKWARD: + mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; + mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; + break; + } + + /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ + if (macroblock_modes & MACROBLOCK_QUANT) + quantizer_scale = get_quantizer_scale(bs, picture); + + if (macroblock_modes & MACROBLOCK_INTRA) { + + if (picture->concealment_motion_vectors) { + if (picture->picture_structure == FRAME_PICTURE) + motion_fr_conceal(bs, picture->f_code[0], &mv_fwd); + else + motion_fi_conceal(bs, picture->f_code[0], &mv_fwd); + + } else { + mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; + mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; + } + mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; + mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; + + // unravaled loop of 6 block(i) calls in macroblock() + slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); + + if (picture->picture_coding_type == D_TYPE) { + vl_vlc_needbits(&bs->vlc); + vl_vlc_dumpbits(&bs->vlc, 1); + } + + } else { + if (picture->picture_structure == FRAME_PICTURE) + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FRAME: + if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) { + MOTION_CALL(motion_mp1, macroblock_modes); + } else { + MOTION_CALL(motion_fr_frame, macroblock_modes); + } + break; + + case MC_FIELD: + MOTION_CALL (motion_fr_field, macroblock_modes); + break; + + case MC_DMV: + MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; + mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; + break; + } + else + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FIELD: + MOTION_CALL (motion_fi_field, macroblock_modes); + break; + + case MC_16X8: + MOTION_CALL (motion_fi_16x8, macroblock_modes); + break; + + case MC_DMV: + MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; + mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; + break; + } + + if (macroblock_modes & MACROBLOCK_PATTERN) { + int coded_block_pattern = get_coded_block_pattern(bs); + + // TODO optimize not fully used for idct accel only mc. + if (coded_block_pattern & 0x20) + slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0 luma 0 + if (coded_block_pattern & 0x10) + slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1 + if (coded_block_pattern & 0x08) + slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2 + if (coded_block_pattern & 0x04) + slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3 + if (coded_block_pattern & 0x2) + slice_non_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma + if (coded_block_pattern & 0x1) + slice_non_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma + } + + dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0; + } + + store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd); + if (++x >= bs->width) { + ++y; + if (y >= bs->height) + return false; + x -= bs->width; + } + + vl_vlc_needbits(&bs->vlc); + mba_inc = 0; + while (1) { + if (bs->vlc.buf >= 0x10000000) { + mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 5) - 2); + break; + } else if (bs->vlc.buf >= 0x03000000) { + mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 11) - 24); + break; + } else switch (vl_vlc_ubits(&bs->vlc, 11)) { + case 8: /* macroblock_escape */ + mba_inc += 33; + /* pass through */ + case 15: /* macroblock_stuffing (MPEG1 only) */ + vl_vlc_dumpbits(&bs->vlc, 11); + vl_vlc_needbits(&bs->vlc); + continue; + default: /* end of slice, or error */ + return true; + } + } + vl_vlc_dumpbits(&bs->vlc, mba->len); + mba_inc += mba->mba; + if (mba_inc) { + //TODO conversion to signed format signed format + dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0; + + mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select; + mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select; + + if (picture->picture_coding_type == P_TYPE) { + mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; + mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; + } + + x += mba_inc; + do { + store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd); + } while (--mba_inc); + } + while (x >= bs->width) { + ++y; + if (y >= bs->height) + return false; + x -= bs->width; + } + } +} + +void +vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height) +{ + assert(bs); + + memset(bs, 0, sizeof(struct vl_mpg12_bs)); + + bs->width = width; + bs->height = height; +} + +void +vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES], + short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]) +{ + unsigned i; + + assert(bs); + assert(ycbcr_stream && ycbcr_buffer); + assert(mv_stream); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + bs->ycbcr_stream[i] = ycbcr_stream[i]; + bs->ycbcr_buffer[i] = ycbcr_buffer[i]; + } + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) + bs->mv_stream[i] = mv_stream[i]; + + // TODO + for (i = 0; i < bs->width*bs->height; ++i) { + bs->mv_stream[0][i].top.x = bs->mv_stream[0][i].top.y = 0; + bs->mv_stream[0][i].top.field_select = PIPE_VIDEO_FRAME; + bs->mv_stream[0][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX; + bs->mv_stream[0][i].bottom.x = bs->mv_stream[0][i].bottom.y = 0; + bs->mv_stream[0][i].bottom.field_select = PIPE_VIDEO_FRAME; + bs->mv_stream[0][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; + + bs->mv_stream[1][i].top.x = bs->mv_stream[1][i].top.y = 0; + bs->mv_stream[1][i].top.field_select = PIPE_VIDEO_FRAME; + bs->mv_stream[1][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MIN; + bs->mv_stream[1][i].bottom.x = bs->mv_stream[1][i].bottom.y = 0; + bs->mv_stream[1][i].bottom.field_select = PIPE_VIDEO_FRAME; + bs->mv_stream[1][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; + } +} + +void +vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer, + struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]) +{ + assert(bs); + assert(num_ycbcr_blocks); + assert(buffer && num_bytes); + + bs->num_ycbcr_blocks = num_ycbcr_blocks; + + vl_vlc_init(&bs->vlc, buffer, num_bytes); + + while(decode_slice(bs, picture)); +} diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h new file mode 100644 index 00000000000..4e48a9faa2f --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h @@ -0,0 +1,59 @@ +/************************************************************************** + * + * Copyright 2011 Christian König. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_mpeg12_bitstream_h +#define vl_mpeg12_bitstream_h + +#include "vl_defines.h" +#include "vl_vlc.h" + +struct vl_mpg12_bs +{ + unsigned width, height; + + struct vl_vlc vlc; + + unsigned *num_ycbcr_blocks; + + struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES]; + short *ycbcr_buffer[VL_MAX_PLANES]; + + struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]; +}; + +void +vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height); + +void +vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES], + short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]); + +void +vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer, + struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]); + +#endif /* vl_mpeg12_bitstream_h */ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c new file mode 100644 index 00000000000..98b0adabb2b --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -0,0 +1,968 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <math.h> +#include <assert.h> + +#include <util/u_memory.h> +#include <util/u_rect.h> +#include <util/u_video.h> + +#include "vl_mpeg12_decoder.h" +#include "vl_defines.h" + +#define SCALE_FACTOR_SNORM (32768.0f / 256.0f) +#define SCALE_FACTOR_SSCALED (1.0f / 256.0f) + +struct format_config { + enum pipe_format zscan_source_format; + enum pipe_format idct_source_format; + enum pipe_format mc_source_format; + + float idct_scale; + float mc_scale; +}; + +static const struct format_config bitstream_format_config[] = { + { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED }, + { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED }, + { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM }, + { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM } +}; + +static const unsigned num_bitstream_format_configs = + sizeof(bitstream_format_config) / sizeof(struct format_config); + +static const struct format_config idct_format_config[] = { + { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED }, + { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED }, + { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM }, + { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM } +}; + +static const unsigned num_idct_format_configs = + sizeof(idct_format_config) / sizeof(struct format_config); + +static const struct format_config mc_format_config[] = { + //{ PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SSCALED, 0.0f, SCALE_FACTOR_SSCALED }, + { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SNORM, 0.0f, SCALE_FACTOR_SNORM } +}; + +static const unsigned num_mc_format_configs = + sizeof(mc_format_config) / sizeof(struct format_config); + +static bool +init_zscan_buffer(struct vl_mpeg12_buffer *buffer) +{ + enum pipe_format formats[3]; + + struct pipe_sampler_view **source; + struct pipe_surface **destination; + + struct vl_mpeg12_decoder *dec; + + unsigned i; + + assert(buffer); + + dec = (struct vl_mpeg12_decoder*)buffer->base.decoder; + + formats[0] = formats[1] = formats[2] = dec->zscan_source_format; + buffer->zscan_source = vl_video_buffer_create_ex + ( + dec->base.context, + dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT, + align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line, + 1, PIPE_VIDEO_CHROMA_FORMAT_444, formats, PIPE_USAGE_STATIC + ); + + if (!buffer->zscan_source) + goto error_source; + + source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source); + if (!source) + goto error_sampler; + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + destination = dec->idct_source->get_surfaces(dec->idct_source); + else + destination = dec->mc_source->get_surfaces(dec->mc_source); + + if (!destination) + goto error_surface; + + for (i = 0; i < VL_MAX_PLANES; ++i) + if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c, + &buffer->zscan[i], source[i], destination[i])) + goto error_plane; + + return true; + +error_plane: + for (; i > 0; --i) + vl_zscan_cleanup_buffer(&buffer->zscan[i - 1]); + +error_surface: +error_sampler: + buffer->zscan_source->destroy(buffer->zscan_source); + +error_source: + return false; +} + +static void +cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer) +{ + unsigned i; + + assert(buffer); + + for (i = 0; i < VL_MAX_PLANES; ++i) + vl_zscan_cleanup_buffer(&buffer->zscan[i]); + buffer->zscan_source->destroy(buffer->zscan_source); +} + +static bool +init_idct_buffer(struct vl_mpeg12_buffer *buffer) +{ + struct pipe_sampler_view **idct_source_sv, **mc_source_sv; + + struct vl_mpeg12_decoder *dec; + + unsigned i; + + assert(buffer); + + dec = (struct vl_mpeg12_decoder*)buffer->base.decoder; + + idct_source_sv = dec->idct_source->get_sampler_view_planes(dec->idct_source); + if (!idct_source_sv) + goto error_source_sv; + + mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source); + if (!mc_source_sv) + goto error_mc_source_sv; + + for (i = 0; i < 3; ++i) + if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c, + &buffer->idct[i], idct_source_sv[i], + mc_source_sv[i])) + goto error_plane; + + return true; + +error_plane: + for (; i > 0; --i) + vl_idct_cleanup_buffer(&buffer->idct[i - 1]); + +error_mc_source_sv: +error_source_sv: + return false; +} + +static void +cleanup_idct_buffer(struct vl_mpeg12_buffer *buf) +{ + struct vl_mpeg12_decoder *dec; + unsigned i; + + assert(buf); + + dec = (struct vl_mpeg12_decoder*)buf->base.decoder; + assert(dec); + + for (i = 0; i < 3; ++i) + vl_idct_cleanup_buffer(&buf->idct[0]); +} + +static bool +init_mc_buffer(struct vl_mpeg12_buffer *buf) +{ + struct vl_mpeg12_decoder *dec; + + assert(buf); + + dec = (struct vl_mpeg12_decoder*)buf->base.decoder; + assert(dec); + + if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0])) + goto error_mc_y; + + if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[1])) + goto error_mc_cb; + + if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[2])) + goto error_mc_cr; + + return true; + +error_mc_cr: + vl_mc_cleanup_buffer(&buf->mc[1]); + +error_mc_cb: + vl_mc_cleanup_buffer(&buf->mc[0]); + +error_mc_y: + return false; +} + +static void +cleanup_mc_buffer(struct vl_mpeg12_buffer *buf) +{ + unsigned i; + + assert(buf); + + for (i = 0; i < VL_MAX_PLANES; ++i) + vl_mc_cleanup_buffer(&buf->mc[i]); +} + +static void +vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer) +{ + struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + struct vl_mpeg12_decoder *dec; + + assert(buf); + + dec = (struct vl_mpeg12_decoder*)buf->base.decoder; + assert(dec); + + cleanup_zscan_buffer(buf); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + cleanup_idct_buffer(buf); + + cleanup_mc_buffer(buf); + + vl_vb_cleanup(&buf->vertex_stream); + + FREE(buf); +} + +static void +vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer) +{ + struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + struct vl_mpeg12_decoder *dec; + + struct pipe_sampler_view **sampler_views; + unsigned i; + + assert(buf); + + dec = (struct vl_mpeg12_decoder *)buf->base.decoder; + assert(dec); + + vl_vb_map(&buf->vertex_stream, dec->base.context); + + sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source); + + assert(sampler_views); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + struct pipe_resource *tex = sampler_views[i]->texture; + struct pipe_box rect = + { + 0, 0, 0, + tex->width0, + tex->height0, + 1 + }; + + buf->tex_transfer[i] = dec->base.context->get_transfer + ( + dec->base.context, tex, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &rect + ); + + buf->texels[i] = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer[i]); + } + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES]; + struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]; + + for (i = 0; i < VL_MAX_PLANES; ++i) + ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i); + + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) + mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i); + + vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream); + } else { + + for (i = 0; i < VL_MAX_PLANES; ++i) + vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear); + } +} + +static void +vl_mpeg12_buffer_set_quant_matrix(struct pipe_video_decode_buffer *buffer, + const uint8_t intra_matrix[64], + const uint8_t non_intra_matrix[64]) +{ + struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + unsigned i; + + for (i = 0; i < VL_MAX_PLANES; ++i) + vl_zscan_upload_quant(&buf->zscan[i], intra_matrix, non_intra_matrix); +} + +static struct pipe_ycbcr_block * +vl_mpeg12_buffer_get_ycbcr_stream(struct pipe_video_decode_buffer *buffer, int component) +{ + struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + + assert(buf); + + return vl_vb_get_ycbcr_stream(&buf->vertex_stream, component); +} + +static short * +vl_mpeg12_buffer_get_ycbcr_buffer(struct pipe_video_decode_buffer *buffer, int component) +{ + struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + + assert(buf); + assert(component < VL_MAX_PLANES); + + return buf->texels[component]; +} + +static unsigned +vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer) +{ + struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + + assert(buf); + + return vl_vb_get_mv_stream_stride(&buf->vertex_stream); +} + +static struct pipe_motionvector * +vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame) +{ + struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + + assert(buf); + + return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame); +} + +static void +vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer, + unsigned num_bytes, const void *data, + struct pipe_mpeg12_picture_desc *picture, + unsigned num_ycbcr_blocks[3]) +{ + struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + struct vl_mpeg12_decoder *dec; + unsigned i; + + assert(buf); + + dec = (struct vl_mpeg12_decoder *)buf->base.decoder; + assert(dec); + + for (i = 0; i < VL_MAX_PLANES; ++i) + vl_zscan_set_layout(&buf->zscan[i], picture->alternate_scan ? dec->zscan_alternate : dec->zscan_normal); + + vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks); +} + +static void +vl_mpeg12_buffer_end_frame(struct pipe_video_decode_buffer *buffer) +{ + struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + struct vl_mpeg12_decoder *dec; + unsigned i; + + assert(buf); + + dec = (struct vl_mpeg12_decoder *)buf->base.decoder; + assert(dec); + + vl_vb_unmap(&buf->vertex_stream, dec->base.context); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer[i]); + dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer[i]); + } +} + +static void +vl_mpeg12_destroy(struct pipe_video_decoder *decoder) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder; + + assert(decoder); + + /* Asserted in softpipe_delete_fs_state() for some reason */ + dec->base.context->bind_vs_state(dec->base.context, NULL); + dec->base.context->bind_fs_state(dec->base.context, NULL); + + dec->base.context->delete_depth_stencil_alpha_state(dec->base.context, dec->dsa); + dec->base.context->delete_sampler_state(dec->base.context, dec->sampler_ycbcr); + + vl_mc_cleanup(&dec->mc_y); + vl_mc_cleanup(&dec->mc_c); + dec->mc_source->destroy(dec->mc_source); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) { + vl_idct_cleanup(&dec->idct_y); + vl_idct_cleanup(&dec->idct_c); + dec->idct_source->destroy(dec->idct_source); + } + + vl_zscan_cleanup(&dec->zscan_y); + vl_zscan_cleanup(&dec->zscan_c); + + dec->base.context->delete_vertex_elements_state(dec->base.context, dec->ves_ycbcr); + dec->base.context->delete_vertex_elements_state(dec->base.context, dec->ves_mv); + + pipe_resource_reference(&dec->quads.buffer, NULL); + pipe_resource_reference(&dec->pos.buffer, NULL); + pipe_resource_reference(&dec->block_num.buffer, NULL); + + pipe_sampler_view_reference(&dec->zscan_linear, NULL); + pipe_sampler_view_reference(&dec->zscan_normal, NULL); + pipe_sampler_view_reference(&dec->zscan_alternate, NULL); + + FREE(dec); +} + +static struct pipe_video_decode_buffer * +vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder; + struct vl_mpeg12_buffer *buffer; + + assert(dec); + + buffer = CALLOC_STRUCT(vl_mpeg12_buffer); + if (buffer == NULL) + return NULL; + + buffer->base.decoder = decoder; + buffer->base.destroy = vl_mpeg12_buffer_destroy; + buffer->base.begin_frame = vl_mpeg12_buffer_begin_frame; + buffer->base.set_quant_matrix = vl_mpeg12_buffer_set_quant_matrix; + buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream; + buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer; + buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride; + buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream; + buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream; + buffer->base.end_frame = vl_mpeg12_buffer_end_frame; + + if (!vl_vb_init(&buffer->vertex_stream, dec->base.context, + dec->base.width / MACROBLOCK_WIDTH, + dec->base.height / MACROBLOCK_HEIGHT)) + goto error_vertex_buffer; + + if (!init_mc_buffer(buffer)) + goto error_mc; + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + if (!init_idct_buffer(buffer)) + goto error_idct; + + if (!init_zscan_buffer(buffer)) + goto error_zscan; + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) + vl_mpg12_bs_init(&buffer->bs, + dec->base.width / MACROBLOCK_WIDTH, + dec->base.height / MACROBLOCK_HEIGHT); + + return &buffer->base; + +error_zscan: + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + cleanup_idct_buffer(buffer); + +error_idct: + cleanup_mc_buffer(buffer); + +error_mc: + vl_vb_cleanup(&buffer->vertex_stream); + +error_vertex_buffer: + FREE(buffer); + return NULL; +} + +static void +vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, + unsigned num_ycbcr_blocks[3], + struct pipe_video_buffer *refs[2], + struct pipe_video_buffer *dst) +{ + struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer; + struct vl_mpeg12_decoder *dec; + + struct pipe_sampler_view **sv[VL_MAX_REF_FRAMES], **mc_source_sv; + struct pipe_surface **surfaces; + + struct pipe_vertex_buffer vb[3]; + + unsigned i, j, component; + unsigned nr_components; + + assert(buf); + + dec = (struct vl_mpeg12_decoder *)buf->base.decoder; + assert(dec); + + for (i = 0; i < 2; ++i) + sv[i] = refs[i] ? refs[i]->get_sampler_view_planes(refs[i]) : NULL; + + vb[0] = dec->quads; + vb[1] = dec->pos; + + surfaces = dst->get_surfaces(dst); + + dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_mv); + for (i = 0; i < VL_MAX_PLANES; ++i) { + if (!surfaces[i]) continue; + + vl_mc_set_surface(&buf->mc[i], surfaces[i]); + + for (j = 0; j < VL_MAX_REF_FRAMES; ++j) { + if (!sv[j]) continue; + + vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);; + dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); + + vl_mc_render_ref(&buf->mc[i], sv[j][i]); + } + } + + vb[2] = dec->block_num; + + dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_ycbcr); + for (i = 0; i < VL_MAX_PLANES; ++i) { + if (!num_ycbcr_blocks[i]) continue; + + vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i); + dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); + + vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + vl_idct_flush(&buf->idct[i], num_ycbcr_blocks[i]); + } + + mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source); + for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) { + if (!surfaces[i]) continue; + + nr_components = util_format_get_nr_components(surfaces[i]->texture->format); + for (j = 0; j < nr_components; ++j, ++component) { + if (!num_ycbcr_blocks[i]) continue; + + vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component); + dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + vl_idct_prepare_stage2(&buf->idct[component]); + else { + dec->base.context->set_fragment_sampler_views(dec->base.context, 1, &mc_source_sv[component]); + dec->base.context->bind_fragment_sampler_states(dec->base.context, 1, &dec->sampler_ycbcr); + } + vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]); + } + } +} + +static bool +init_pipe_state(struct vl_mpeg12_decoder *dec) +{ + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_sampler_state sampler; + unsigned i; + + assert(dec); + + memset(&dsa, 0, sizeof dsa); + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + for (i = 0; i < 2; ++i) { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].valuemask = 0; + dsa.stencil[i].writemask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref_value = 0; + dec->dsa = dec->base.context->create_depth_stencil_alpha_state(dec->base.context, &dsa); + dec->base.context->bind_depth_stencil_alpha_state(dec->base.context, dec->dsa); + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + dec->sampler_ycbcr = dec->base.context->create_sampler_state(dec->base.context, &sampler); + if (!dec->sampler_ycbcr) + return false; + + return true; +} + +static const struct format_config* +find_format_config(struct vl_mpeg12_decoder *dec, const struct format_config configs[], unsigned num_configs) +{ + struct pipe_screen *screen; + unsigned i; + + assert(dec); + + screen = dec->base.context->screen; + + for (i = 0; i < num_configs; ++i) { + if (!screen->is_format_supported(screen, configs[i].zscan_source_format, PIPE_TEXTURE_2D, + 1, PIPE_BIND_SAMPLER_VIEW)) + continue; + + if (configs[i].idct_source_format != PIPE_FORMAT_NONE) { + if (!screen->is_format_supported(screen, configs[i].idct_source_format, PIPE_TEXTURE_2D, + 1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET)) + continue; + + if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_3D, + 1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET)) + continue; + } else { + if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_2D, + 1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET)) + continue; + } + return &configs[i]; + } + + return NULL; +} + +static bool +init_zscan(struct vl_mpeg12_decoder *dec, const struct format_config* format_config) +{ + unsigned num_channels; + + assert(dec); + + dec->zscan_source_format = format_config->zscan_source_format; + dec->zscan_linear = vl_zscan_layout(dec->base.context, vl_zscan_linear, dec->blocks_per_line); + dec->zscan_normal = vl_zscan_layout(dec->base.context, vl_zscan_normal, dec->blocks_per_line); + dec->zscan_alternate = vl_zscan_layout(dec->base.context, vl_zscan_alternate, dec->blocks_per_line); + + num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1; + + if (!vl_zscan_init(&dec->zscan_y, dec->base.context, dec->base.width, dec->base.height, + dec->blocks_per_line, dec->num_blocks, num_channels)) + return false; + + if (!vl_zscan_init(&dec->zscan_c, dec->base.context, dec->chroma_width, dec->chroma_height, + dec->blocks_per_line, dec->num_blocks, num_channels)) + return false; + + return true; +} + +static bool +init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config) +{ + unsigned nr_of_idct_render_targets, max_inst; + enum pipe_format formats[3]; + + struct pipe_sampler_view *matrix = NULL; + + nr_of_idct_render_targets = dec->base.context->screen->get_param + ( + dec->base.context->screen, PIPE_CAP_MAX_RENDER_TARGETS + ); + + max_inst = dec->base.context->screen->get_shader_param + ( + dec->base.context->screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_INSTRUCTIONS + ); + + // Just assume we need 32 inst per render target, not 100% true, but should work in most cases + if (nr_of_idct_render_targets >= 4 && max_inst >= 32*4) + // more than 4 render targets usually doesn't makes any seens + nr_of_idct_render_targets = 4; + else + nr_of_idct_render_targets = 1; + + formats[0] = formats[1] = formats[2] = format_config->idct_source_format; + dec->idct_source = vl_video_buffer_create_ex + ( + dec->base.context, dec->base.width / 4, dec->base.height, 1, + dec->base.chroma_format, formats, PIPE_USAGE_STATIC + ); + + if (!dec->idct_source) + goto error_idct_source; + + formats[0] = formats[1] = formats[2] = format_config->mc_source_format; + dec->mc_source = vl_video_buffer_create_ex + ( + dec->base.context, dec->base.width / nr_of_idct_render_targets, + dec->base.height / 4, nr_of_idct_render_targets, + dec->base.chroma_format, formats, PIPE_USAGE_STATIC + ); + + if (!dec->mc_source) + goto error_mc_source; + + if (!(matrix = vl_idct_upload_matrix(dec->base.context, format_config->idct_scale))) + goto error_matrix; + + if (!vl_idct_init(&dec->idct_y, dec->base.context, dec->base.width, dec->base.height, + nr_of_idct_render_targets, matrix, matrix)) + goto error_y; + + if(!vl_idct_init(&dec->idct_c, dec->base.context, dec->chroma_width, dec->chroma_height, + nr_of_idct_render_targets, matrix, matrix)) + goto error_c; + + pipe_sampler_view_reference(&matrix, NULL); + + return true; + +error_c: + vl_idct_cleanup(&dec->idct_y); + +error_y: + pipe_sampler_view_reference(&matrix, NULL); + +error_matrix: + dec->mc_source->destroy(dec->mc_source); + +error_mc_source: + dec->idct_source->destroy(dec->idct_source); + +error_idct_source: + return false; +} + +static bool +init_mc_source_widthout_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config) +{ + enum pipe_format formats[3]; + + formats[0] = formats[1] = formats[2] = format_config->mc_source_format; + dec->mc_source = vl_video_buffer_create_ex + ( + dec->base.context, dec->base.width, dec->base.height, 1, + dec->base.chroma_format, formats, PIPE_USAGE_STATIC + ); + + return dec->mc_source != NULL; +} + +static void +mc_vert_shader_callback(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_output, + struct ureg_dst tex) +{ + struct vl_mpeg12_decoder *dec = priv; + struct ureg_dst o_vtex; + + assert(priv && mc); + assert(shader); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) { + struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c; + vl_idct_stage2_vert_shader(idct, shader, first_output, tex); + } else { + o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output); + ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(tex)); + } +} + +static void +mc_frag_shader_callback(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_input, + struct ureg_dst dst) +{ + struct vl_mpeg12_decoder *dec = priv; + struct ureg_src src, sampler; + + assert(priv && mc); + assert(shader); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) { + struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c; + vl_idct_stage2_frag_shader(idct, shader, first_input, dst); + } else { + src = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input, TGSI_INTERPOLATE_LINEAR); + sampler = ureg_DECL_sampler(shader, 0); + ureg_TEX(shader, dst, TGSI_TEXTURE_2D, src, sampler); + } +} + +struct pipe_video_decoder * +vl_create_mpeg12_decoder(struct pipe_context *context, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + const unsigned block_size_pixels = BLOCK_WIDTH * BLOCK_HEIGHT; + const struct format_config *format_config; + struct vl_mpeg12_decoder *dec; + + assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12); + + dec = CALLOC_STRUCT(vl_mpeg12_decoder); + + if (!dec) + return NULL; + + dec->base.context = context; + dec->base.profile = profile; + dec->base.entrypoint = entrypoint; + dec->base.chroma_format = chroma_format; + dec->base.width = width; + dec->base.height = height; + + dec->base.destroy = vl_mpeg12_destroy; + dec->base.create_buffer = vl_mpeg12_create_buffer; + dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer; + + dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4); + dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels; + + dec->quads = vl_vb_upload_quads(dec->base.context); + dec->pos = vl_vb_upload_pos( + dec->base.context, + dec->base.width / MACROBLOCK_WIDTH, + dec->base.height / MACROBLOCK_HEIGHT + ); + dec->block_num = vl_vb_upload_block_num(dec->base.context, dec->num_blocks); + + dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context); + dec->ves_mv = vl_vb_get_ves_mv(dec->base.context); + + /* TODO: Implement 422, 444 */ + assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); + + if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { + dec->chroma_width = dec->base.width / 2; + dec->chroma_height = dec->base.height / 2; + } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) { + dec->chroma_width = dec->base.width; + dec->chroma_height = dec->base.height / 2; + } else { + dec->chroma_width = dec->base.width; + dec->chroma_height = dec->base.height; + } + + switch (entrypoint) { + case PIPE_VIDEO_ENTRYPOINT_BITSTREAM: + format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs); + break; + + case PIPE_VIDEO_ENTRYPOINT_IDCT: + format_config = find_format_config(dec, idct_format_config, num_idct_format_configs); + break; + + case PIPE_VIDEO_ENTRYPOINT_MC: + format_config = find_format_config(dec, mc_format_config, num_mc_format_configs); + break; + + default: + assert(0); + return NULL; + } + + if (!format_config) + return NULL; + + if (!init_zscan(dec, format_config)) + goto error_zscan; + + if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) { + if (!init_idct(dec, format_config)) + goto error_sources; + } else { + if (!init_mc_source_widthout_idct(dec, format_config)) + goto error_sources; + } + + if (!vl_mc_init(&dec->mc_y, dec->base.context, dec->base.width, dec->base.height, + MACROBLOCK_HEIGHT, format_config->mc_scale, + mc_vert_shader_callback, mc_frag_shader_callback, dec)) + goto error_mc_y; + + // TODO + if (!vl_mc_init(&dec->mc_c, dec->base.context, dec->base.width, dec->base.height, + BLOCK_HEIGHT, format_config->mc_scale, + mc_vert_shader_callback, mc_frag_shader_callback, dec)) + goto error_mc_c; + + if (!init_pipe_state(dec)) + goto error_pipe_state; + + return &dec->base; + +error_pipe_state: + vl_mc_cleanup(&dec->mc_c); + +error_mc_c: + vl_mc_cleanup(&dec->mc_y); + +error_mc_y: + if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) { + vl_idct_cleanup(&dec->idct_y); + vl_idct_cleanup(&dec->idct_c); + dec->idct_source->destroy(dec->idct_source); + } + dec->mc_source->destroy(dec->mc_source); + +error_sources: + vl_zscan_cleanup(&dec->zscan_y); + vl_zscan_cleanup(&dec->zscan_c); + +error_zscan: + FREE(dec); + return NULL; +} diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h new file mode 100644 index 00000000000..01265e368a3 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h @@ -0,0 +1,105 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_mpeg12_decoder_h +#define vl_mpeg12_decoder_h + +#include <pipe/p_video_decoder.h> + +#include "vl_mpeg12_bitstream.h" +#include "vl_zscan.h" +#include "vl_idct.h" +#include "vl_mc.h" + +#include "vl_vertex_buffers.h" +#include "vl_video_buffer.h" + +struct pipe_screen; +struct pipe_context; + +struct vl_mpeg12_decoder +{ + struct pipe_video_decoder base; + + unsigned chroma_width, chroma_height; + + unsigned blocks_per_line; + unsigned num_blocks; + + enum pipe_format zscan_source_format; + + struct pipe_vertex_buffer quads; + struct pipe_vertex_buffer pos; + struct pipe_vertex_buffer block_num; + + void *ves_ycbcr; + void *ves_mv; + + void *sampler_ycbcr; + + struct pipe_sampler_view *zscan_linear; + struct pipe_sampler_view *zscan_normal; + struct pipe_sampler_view *zscan_alternate; + + struct pipe_video_buffer *idct_source; + struct pipe_video_buffer *mc_source; + + struct vl_zscan zscan_y, zscan_c; + struct vl_idct idct_y, idct_c; + struct vl_mc mc_y, mc_c; + + void *dsa; +}; + +struct vl_mpeg12_buffer +{ + struct pipe_video_decode_buffer base; + + struct vl_vertex_buffer vertex_stream; + + struct pipe_video_buffer *zscan_source; + + struct vl_mpg12_bs bs; + struct vl_zscan_buffer zscan[VL_MAX_PLANES]; + struct vl_idct_buffer idct[VL_MAX_PLANES]; + struct vl_mc_buffer mc[VL_MAX_PLANES]; + + struct pipe_transfer *tex_transfer[VL_MAX_PLANES]; + short *texels[VL_MAX_PLANES]; +}; + +/** + * creates a shader based mpeg12 decoder + */ +struct pipe_video_decoder * +vl_create_mpeg12_decoder(struct pipe_context *pipe, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height); + +#endif /* vl_mpeg12_decoder_h */ diff --git a/src/gallium/auxiliary/vl/vl_types.h b/src/gallium/auxiliary/vl/vl_types.h new file mode 100644 index 00000000000..27bb69d67bc --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_types.h @@ -0,0 +1,51 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_types_h +#define vl_types_h + +struct vertex2f +{ + float x, y; +}; + +struct vertex2s +{ + short x, y; +}; + +struct vertex4f +{ + float x, y, z, w; +}; + +struct vertex4s +{ + short x, y, z, w; +}; + +#endif /* vl_types_h */ diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c new file mode 100644 index 00000000000..c0f1449bf80 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c @@ -0,0 +1,419 @@ +/************************************************************************** + * + * Copyright 2010 Christian König + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <assert.h> +#include <util/u_format.h> +#include "vl_vertex_buffers.h" +#include "vl_types.h" + +/* vertices for a quad covering a block */ +static const struct vertex2f block_quad[4] = { + {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f} +}; + +struct pipe_vertex_buffer +vl_vb_upload_quads(struct pipe_context *pipe) +{ + struct pipe_vertex_buffer quad; + struct pipe_transfer *buf_transfer; + struct vertex2f *v; + + unsigned i; + + assert(pipe); + + /* create buffer */ + quad.stride = sizeof(struct vertex2f); + quad.buffer_offset = 0; + quad.buffer = pipe_buffer_create + ( + pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + sizeof(struct vertex2f) * 4 + ); + + if(!quad.buffer) + return quad; + + /* and fill it */ + v = pipe_buffer_map + ( + pipe, + quad.buffer, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &buf_transfer + ); + + for (i = 0; i < 4; ++i, ++v) { + v->x = block_quad[i].x; + v->y = block_quad[i].y; + } + + pipe_buffer_unmap(pipe, buf_transfer); + + return quad; +} + +struct pipe_vertex_buffer +vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height) +{ + struct pipe_vertex_buffer pos; + struct pipe_transfer *buf_transfer; + struct vertex2s *v; + + unsigned x, y; + + assert(pipe); + + /* create buffer */ + pos.stride = sizeof(struct vertex2s); + pos.buffer_offset = 0; + pos.buffer = pipe_buffer_create + ( + pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + sizeof(struct vertex2s) * width * height + ); + + if(!pos.buffer) + return pos; + + /* and fill it */ + v = pipe_buffer_map + ( + pipe, + pos.buffer, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &buf_transfer + ); + + for ( y = 0; y < height; ++y) { + for ( x = 0; x < width; ++x, ++v) { + v->x = x; + v->y = y; + } + } + + pipe_buffer_unmap(pipe, buf_transfer); + + return pos; +} + +struct pipe_vertex_buffer +vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks) +{ + struct pipe_vertex_buffer buf; + struct pipe_transfer *buf_transfer; + struct vertex2s *v; + unsigned i; + + assert(pipe); + + /* create buffer */ + buf.stride = sizeof(struct vertex2s); + buf.buffer_offset = 0; + buf.buffer = pipe_buffer_create + ( + pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + sizeof(struct vertex2s) * num_blocks + ); + + if(!buf.buffer) + return buf; + + /* and fill it */ + v = pipe_buffer_map + ( + pipe, + buf.buffer, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &buf_transfer + ); + + for ( i = 0; i < num_blocks; ++i, ++v) { + v->x = i; + v->y = i; + } + + pipe_buffer_unmap(pipe, buf_transfer); + + return buf; +} + +static struct pipe_vertex_element +vl_vb_get_quad_vertex_element(void) +{ + struct pipe_vertex_element element; + + /* setup rectangle element */ + element.src_offset = 0; + element.instance_divisor = 0; + element.vertex_buffer_index = 0; + element.src_format = PIPE_FORMAT_R32G32_FLOAT; + + return element; +} + +static void +vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements, + unsigned vertex_buffer_index) +{ + unsigned i, offset = 0; + + assert(elements && num_elements); + + for ( i = 0; i < num_elements; ++i ) { + elements[i].src_offset = offset; + elements[i].instance_divisor = 1; + elements[i].vertex_buffer_index = vertex_buffer_index; + offset += util_format_get_blocksize(elements[i].src_format); + } +} + +void * +vl_vb_get_ves_ycbcr(struct pipe_context *pipe) +{ + struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS]; + + assert(pipe); + + memset(&vertex_elems, 0, sizeof(vertex_elems)); + vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); + + /* Position element */ + vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED; + + vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1); + + /* block num element */ + vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R16G16_SSCALED; + + vl_vb_element_helper(&vertex_elems[VS_I_BLOCK_NUM], 1, 2); + + return pipe->create_vertex_elements_state(pipe, 3, vertex_elems); +} + +void * +vl_vb_get_ves_mv(struct pipe_context *pipe) +{ + struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS]; + + assert(pipe); + + memset(&vertex_elems, 0, sizeof(vertex_elems)); + vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); + + /* Position element */ + vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED; + + vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1); + + /* motion vector TOP element */ + vertex_elems[VS_I_MV_TOP].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED; + + /* motion vector BOTTOM element */ + vertex_elems[VS_I_MV_BOTTOM].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED; + + vl_vb_element_helper(&vertex_elems[VS_I_MV_TOP], 2, 2); + + return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems); +} + +bool +vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, + unsigned width, unsigned height) +{ + unsigned i, size; + + assert(buffer); + + buffer->width = width; + buffer->height = height; + + size = width * height; + + for (i = 0; i < VL_MAX_PLANES; ++i) { + buffer->ycbcr[i].resource = pipe_buffer_create + ( + pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + sizeof(struct pipe_ycbcr_block) * size * 4 + ); + if (!buffer->ycbcr[i].resource) + goto error_ycbcr; + } + + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) { + buffer->mv[i].resource = pipe_buffer_create + ( + pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + sizeof(struct pipe_motionvector) * size + ); + if (!buffer->mv[i].resource) + goto error_mv; + } + + vl_vb_map(buffer, pipe); + return true; + +error_mv: + for (i = 0; i < VL_MAX_PLANES; ++i) + pipe_resource_reference(&buffer->mv[i].resource, NULL); + +error_ycbcr: + for (i = 0; i < VL_MAX_PLANES; ++i) + pipe_resource_reference(&buffer->ycbcr[i].resource, NULL); + return false; +} + +unsigned +vl_vb_attributes_per_plock(struct vl_vertex_buffer *buffer) +{ + return 1; +} + +struct pipe_vertex_buffer +vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component) +{ + struct pipe_vertex_buffer buf; + + assert(buffer); + + buf.stride = sizeof(struct pipe_ycbcr_block); + buf.buffer_offset = 0; + buf.buffer = buffer->ycbcr[component].resource; + + return buf; +} + +struct pipe_vertex_buffer +vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector) +{ + struct pipe_vertex_buffer buf; + + assert(buffer); + + buf.stride = sizeof(struct pipe_motionvector); + buf.buffer_offset = 0; + buf.buffer = buffer->mv[motionvector].resource; + + return buf; +} + +void +vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe) +{ + unsigned i; + + assert(buffer && pipe); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + buffer->ycbcr[i].vertex_stream = pipe_buffer_map + ( + pipe, + buffer->ycbcr[i].resource, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &buffer->ycbcr[i].transfer + ); + } + + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) { + buffer->mv[i].vertex_stream = pipe_buffer_map + ( + pipe, + buffer->mv[i].resource, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &buffer->mv[i].transfer + ); + } + +} + +struct pipe_ycbcr_block * +vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component) +{ + assert(buffer); + assert(component < VL_MAX_PLANES); + + return buffer->ycbcr[component].vertex_stream; +} + +unsigned +vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer) +{ + assert(buffer); + + return buffer->width; +} + +struct pipe_motionvector * +vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame) +{ + assert(buffer); + assert(ref_frame < VL_MAX_REF_FRAMES); + + return buffer->mv[ref_frame].vertex_stream; +} + +void +vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe) +{ + unsigned i; + + assert(buffer && pipe); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + pipe_buffer_unmap(pipe, buffer->ycbcr[i].transfer); + } + + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) { + pipe_buffer_unmap(pipe, buffer->mv[i].transfer); + } +} + +void +vl_vb_cleanup(struct vl_vertex_buffer *buffer) +{ + unsigned i; + + assert(buffer); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + pipe_resource_reference(&buffer->ycbcr[i].resource, NULL); + } + + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) { + pipe_resource_reference(&buffer->mv[i].resource, NULL); + } +} diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h new file mode 100644 index 00000000000..74845a42b69 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h @@ -0,0 +1,104 @@ +/************************************************************************** + * + * Copyright 2010 Christian König + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef vl_vertex_buffers_h +#define vl_vertex_buffers_h + +#include <pipe/p_state.h> +#include <pipe/p_video_state.h> + +#include "vl_defines.h" +#include "vl_types.h" + +/* vertex buffers act as a todo list + * uploading all the usefull informations to video ram + * so a vertex shader can work with them + */ + +/* inputs to the vertex shaders */ +enum VS_INPUT +{ + VS_I_RECT = 0, + VS_I_VPOS = 1, + + VS_I_BLOCK_NUM = 2, + + VS_I_MV_TOP = 2, + VS_I_MV_BOTTOM = 3, + + NUM_VS_INPUTS = 4 +}; + +struct vl_vertex_buffer +{ + unsigned width, height; + + struct { + struct pipe_resource *resource; + struct pipe_transfer *transfer; + struct pipe_ycbcr_block *vertex_stream; + } ycbcr[VL_MAX_PLANES]; + + struct { + struct pipe_resource *resource; + struct pipe_transfer *transfer; + struct pipe_motionvector *vertex_stream; + } mv[VL_MAX_REF_FRAMES]; +}; + +struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe); + +struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height); + +struct pipe_vertex_buffer vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks); + +void *vl_vb_get_ves_ycbcr(struct pipe_context *pipe); + +void *vl_vb_get_ves_mv(struct pipe_context *pipe); + +bool vl_vb_init(struct vl_vertex_buffer *buffer, + struct pipe_context *pipe, + unsigned width, unsigned height); + +unsigned vl_vb_attributes_per_plock(struct vl_vertex_buffer *buffer); + +void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); + +struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component); + +struct pipe_ycbcr_block *vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component); + +struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame); + +unsigned vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer); + +struct pipe_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame); + +void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); + +void vl_vb_cleanup(struct vl_vertex_buffer *buffer); + +#endif /* vl_vertex_buffers_h */ diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c new file mode 100644 index 00000000000..4d8b6649dd2 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_video_buffer.c @@ -0,0 +1,340 @@ +/************************************************************************** + * + * Copyright 2011 Christian König. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <assert.h> + +#include <pipe/p_screen.h> +#include <pipe/p_context.h> +#include <pipe/p_state.h> + +#include <util/u_format.h> +#include <util/u_inlines.h> +#include <util/u_sampler.h> +#include <util/u_memory.h> + +#include "vl_video_buffer.h" + +const enum pipe_format const_resource_formats_YV12[3] = { + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8_UNORM +}; + +const enum pipe_format const_resource_formats_NV12[3] = { + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8G8_UNORM, + PIPE_FORMAT_NONE +}; + +const enum pipe_format * +vl_video_buffer_formats(struct pipe_screen *screen, enum pipe_format format) +{ + switch(format) { + case PIPE_FORMAT_YV12: + return const_resource_formats_YV12; + + case PIPE_FORMAT_NV12: + return const_resource_formats_NV12; + + default: + return NULL; + } +} + +boolean +vl_video_buffer_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_video_profile profile) +{ + const enum pipe_format *resource_formats; + unsigned i; + + resource_formats = vl_video_buffer_formats(screen, format); + if (!resource_formats) + return false; + + for(i = 0; i < VL_MAX_PLANES; ++i) { + if (!resource_formats[i]) + continue; + + if (!screen->is_format_supported(screen, resource_formats[i], PIPE_TEXTURE_2D, 0, PIPE_USAGE_STATIC)) + return false; + } + + return true; +} + +unsigned +vl_video_buffer_max_size(struct pipe_screen *screen) +{ + uint32_t max_2d_texture_level; + + max_2d_texture_level = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + + return 1 << (max_2d_texture_level-1); +} + +static void +vl_video_buffer_destroy(struct pipe_video_buffer *buffer) +{ + struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer; + unsigned i; + + assert(buf); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + pipe_surface_reference(&buf->surfaces[i], NULL); + pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL); + pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL); + pipe_resource_reference(&buf->resources[i], NULL); + } + + FREE(buffer); +} + +static struct pipe_sampler_view ** +vl_video_buffer_sampler_view_planes(struct pipe_video_buffer *buffer) +{ + struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer; + struct pipe_sampler_view sv_templ; + struct pipe_context *pipe; + unsigned i; + + assert(buf); + + pipe = buf->base.context; + + for (i = 0; i < buf->num_planes; ++i ) { + if (!buf->sampler_view_planes[i]) { + memset(&sv_templ, 0, sizeof(sv_templ)); + u_sampler_view_default_template(&sv_templ, buf->resources[i], buf->resources[i]->format); + + if (util_format_get_nr_components(buf->resources[i]->format) == 1) + sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = sv_templ.swizzle_a = PIPE_SWIZZLE_RED; + + buf->sampler_view_planes[i] = pipe->create_sampler_view(pipe, buf->resources[i], &sv_templ); + if (!buf->sampler_view_planes[i]) + goto error; + } + } + + return buf->sampler_view_planes; + +error: + for (i = 0; i < buf->num_planes; ++i ) + pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL); + + return NULL; +} + +static struct pipe_sampler_view ** +vl_video_buffer_sampler_view_components(struct pipe_video_buffer *buffer) +{ + struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer; + struct pipe_sampler_view sv_templ; + struct pipe_context *pipe; + unsigned i, j, component; + + assert(buf); + + pipe = buf->base.context; + + for (component = 0, i = 0; i < buf->num_planes; ++i ) { + unsigned nr_components = util_format_get_nr_components(buf->resources[i]->format); + + for (j = 0; j < nr_components; ++j, ++component) { + assert(component < VL_MAX_PLANES); + + if (!buf->sampler_view_components[component]) { + memset(&sv_templ, 0, sizeof(sv_templ)); + u_sampler_view_default_template(&sv_templ, buf->resources[i], buf->resources[i]->format); + sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = PIPE_SWIZZLE_RED + j; + sv_templ.swizzle_a = PIPE_SWIZZLE_ONE; + buf->sampler_view_components[component] = pipe->create_sampler_view(pipe, buf->resources[i], &sv_templ); + if (!buf->sampler_view_components[component]) + goto error; + } + } + } + + return buf->sampler_view_components; + +error: + for (i = 0; i < VL_MAX_PLANES; ++i ) + pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL); + + return NULL; +} + +static struct pipe_surface ** +vl_video_buffer_surfaces(struct pipe_video_buffer *buffer) +{ + struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer; + struct pipe_surface surf_templ; + struct pipe_context *pipe; + unsigned i; + + assert(buf); + + pipe = buf->base.context; + + for (i = 0; i < buf->num_planes; ++i ) { + if (!buf->surfaces[i]) { + memset(&surf_templ, 0, sizeof(surf_templ)); + surf_templ.format = buf->resources[i]->format; + surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; + buf->surfaces[i] = pipe->create_surface(pipe, buf->resources[i], &surf_templ); + if (!buf->surfaces[i]) + goto error; + } + } + + return buf->surfaces; + +error: + for (i = 0; i < buf->num_planes; ++i ) + pipe_surface_reference(&buf->surfaces[i], NULL); + + return NULL; +} + +struct pipe_video_buffer * +vl_video_buffer_create(struct pipe_context *pipe, + enum pipe_format buffer_format, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + const enum pipe_format *resource_formats; + struct pipe_video_buffer *result; + unsigned buffer_width, buffer_height; + bool pot_buffers; + + assert(pipe); + assert(width > 0 && height > 0); + + pot_buffers = !pipe->screen->get_video_param + ( + pipe->screen, + PIPE_VIDEO_PROFILE_UNKNOWN, + PIPE_VIDEO_CAP_NPOT_TEXTURES + ); + + resource_formats = vl_video_buffer_formats(pipe->screen, buffer_format); + if (!resource_formats) + return NULL; + + buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH); + buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT); + + result = vl_video_buffer_create_ex + ( + pipe, buffer_width, buffer_height, 1, + chroma_format, resource_formats, PIPE_USAGE_STATIC + ); + if (result) + result->buffer_format = buffer_format; + + return result; +} + +struct pipe_video_buffer * +vl_video_buffer_create_ex(struct pipe_context *pipe, + unsigned width, unsigned height, unsigned depth, + enum pipe_video_chroma_format chroma_format, + const enum pipe_format resource_formats[VL_MAX_PLANES], + unsigned usage) +{ + struct vl_video_buffer *buffer; + struct pipe_resource templ; + unsigned i; + + assert(pipe); + + buffer = CALLOC_STRUCT(vl_video_buffer); + + buffer->base.context = pipe; + buffer->base.destroy = vl_video_buffer_destroy; + buffer->base.get_sampler_view_planes = vl_video_buffer_sampler_view_planes; + buffer->base.get_sampler_view_components = vl_video_buffer_sampler_view_components; + buffer->base.get_surfaces = vl_video_buffer_surfaces; + buffer->base.chroma_format = chroma_format; + buffer->base.width = width; + buffer->base.height = height; + buffer->num_planes = 1; + + memset(&templ, 0, sizeof(templ)); + templ.target = depth > 1 ? PIPE_TEXTURE_3D : PIPE_TEXTURE_2D; + templ.format = resource_formats[0]; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = depth; + templ.array_size = 1; + templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; + templ.usage = usage; + + buffer->resources[0] = pipe->screen->resource_create(pipe->screen, &templ); + if (!buffer->resources[0]) + goto error; + + if (resource_formats[1] == PIPE_FORMAT_NONE) { + assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444); + assert(resource_formats[2] == PIPE_FORMAT_NONE); + return &buffer->base; + } else + buffer->num_planes = 2; + + templ.format = resource_formats[1]; + if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { + templ.width0 /= 2; + templ.height0 /= 2; + } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) { + templ.height0 /= 2; + } + + buffer->resources[1] = pipe->screen->resource_create(pipe->screen, &templ); + if (!buffer->resources[1]) + goto error; + + if (resource_formats[2] == PIPE_FORMAT_NONE) + return &buffer->base; + else + buffer->num_planes = 3; + + templ.format = resource_formats[2]; + buffer->resources[2] = pipe->screen->resource_create(pipe->screen, &templ); + if (!buffer->resources[2]) + goto error; + + return &buffer->base; + +error: + for (i = 0; i < VL_MAX_PLANES; ++i) + pipe_resource_reference(&buffer->resources[i], NULL); + FREE(buffer); + + return NULL; +} diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h new file mode 100644 index 00000000000..291d15c1e9d --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_video_buffer.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright 2011 Christian König. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_ycbcr_buffer_h +#define vl_ycbcr_buffer_h + +#include <pipe/p_context.h> +#include <pipe/p_video_decoder.h> + +#include "vl_defines.h" + +/** + * implementation of a planar ycbcr buffer + */ + +/* planar buffer for vl data upload and manipulation */ +struct vl_video_buffer +{ + struct pipe_video_buffer base; + unsigned num_planes; + struct pipe_resource *resources[VL_MAX_PLANES]; + struct pipe_sampler_view *sampler_view_planes[VL_MAX_PLANES]; + struct pipe_sampler_view *sampler_view_components[VL_MAX_PLANES]; + struct pipe_surface *surfaces[VL_MAX_PLANES]; +}; + +/** + * get subformats for each plane + */ +const enum pipe_format * +vl_video_buffer_formats(struct pipe_screen *screen, enum pipe_format format); + +/** + * get maximum size of video buffers + */ +unsigned +vl_video_buffer_max_size(struct pipe_screen *screen); + +/** + * check if video buffer format is supported for a codec/profile + * can be used as default implementation of screen->is_video_format_supported + */ +boolean +vl_video_buffer_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_video_profile profile); + +/** + * creates a video buffer, can be used as a standard implementation for pipe->create_video_buffer + */ +struct pipe_video_buffer * +vl_video_buffer_create(struct pipe_context *pipe, + enum pipe_format buffer_format, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height); + +/** + * extended create function, gets depth, usage and formats for each plane seperately + */ +struct pipe_video_buffer * +vl_video_buffer_create_ex(struct pipe_context *pipe, + unsigned width, unsigned height, unsigned depth, + enum pipe_video_chroma_format chroma_format, + const enum pipe_format resource_formats[VL_MAX_PLANES], + unsigned usage); + +#endif /* vl_ycbcr_buffer_h */ diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h new file mode 100644 index 00000000000..8c5b3aca47d --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_vlc.h @@ -0,0 +1,138 @@ +/************************************************************************** + * + * Copyright 2011 Christian König. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * This file is based uppon slice_xvmc.c and vlc.h from the xine project, + * which in turn is based on mpeg2dec. The following is the original copyright: + * + * Copyright (C) 2000-2002 Michel Lespinasse <[email protected]> + * Copyright (C) 1999-2000 Aaron Holtzman <[email protected]> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef vl_vlc_h +#define vl_vlc_h + +struct vl_vlc +{ + uint32_t buf; /* current 32 bit working set of buffer */ + int bits; /* used bits in working set */ + const uint8_t *ptr; /* buffer with stream data */ + const uint8_t *max; /* ptr+len of buffer */ +}; + +static inline void +vl_vlc_restart(struct vl_vlc *vlc) +{ + vlc->buf = (vlc->ptr[0] << 24) | (vlc->ptr[1] << 16) | (vlc->ptr[2] << 8) | vlc->ptr[3]; + vlc->bits = -16; + vlc->ptr += 4; +} + +static inline void +vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data, unsigned len) +{ + vlc->ptr = data; + vlc->max = data + len; + vl_vlc_restart(vlc); +} + +static inline bool +vl_vlc_getbyte(struct vl_vlc *vlc) +{ + vlc->buf <<= 8; + vlc->buf |= vlc->ptr[0]; + vlc->ptr++; + return vlc->ptr < vlc->max; +} + +#define vl_vlc_getword(vlc, shift) \ +do { \ + (vlc)->buf |= (((vlc)->ptr[0] << 8) | (vlc)->ptr[1]) << (shift); \ + (vlc)->ptr += 2; \ +} while (0) + +/* make sure that there are at least 16 valid bits in bit_buf */ +#define vl_vlc_needbits(vlc) \ +do { \ + if ((vlc)->bits >= 0) { \ + vl_vlc_getword(vlc, (vlc)->bits); \ + (vlc)->bits -= 16; \ + } \ +} while (0) + +/* make sure that the full 32 bit of the buffer are valid */ +static inline void +vl_vlc_need32bits(struct vl_vlc *vlc) +{ + vl_vlc_needbits(vlc); + if (vlc->bits > -8) { + unsigned n = -vlc->bits; + vlc->buf <<= n; + vlc->buf |= *vlc->ptr << 8; + vlc->bits = -8; + vlc->ptr++; + } + if (vlc->bits > -16) { + unsigned n = -vlc->bits - 8; + vlc->buf <<= n; + vlc->buf |= *vlc->ptr; + vlc->bits = -16; + vlc->ptr++; + } +} + +/* remove num valid bits from bit_buf */ +#define vl_vlc_dumpbits(vlc, num) \ +do { \ + (vlc)->buf <<= (num); \ + (vlc)->bits += (num); \ +} while (0) + +/* take num bits from the high part of bit_buf and zero extend them */ +#define vl_vlc_ubits(vlc, num) (((uint32_t)((vlc)->buf)) >> (32 - (num))) + +/* take num bits from the high part of bit_buf and sign extend them */ +#define vl_vlc_sbits(vlc, num) (((int32_t)((vlc)->buf)) >> (32 - (num))) + +#endif /* vl_vlc_h */ diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c new file mode 100644 index 00000000000..58cee0070d8 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_zscan.c @@ -0,0 +1,604 @@ +/************************************************************************** + * + * Copyright 2011 Christian König + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <assert.h> + +#include <pipe/p_screen.h> +#include <pipe/p_context.h> + +#include <util/u_draw.h> +#include <util/u_sampler.h> +#include <util/u_inlines.h> + +#include <tgsi/tgsi_ureg.h> + +#include <vl/vl_defines.h> +#include <vl/vl_types.h> + +#include "vl_zscan.h" +#include "vl_vertex_buffers.h" + +enum VS_OUTPUT +{ + VS_O_VPOS, + VS_O_VTEX +}; + +const int vl_zscan_linear[] = +{ + /* Linear scan pattern */ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23, + 24,25,26,27,28,29,30,31, + 32,33,34,35,36,37,38,39, + 40,41,42,43,44,45,46,47, + 48,49,50,51,52,53,54,55, + 56,57,58,59,60,61,62,63 +}; + +const int vl_zscan_normal[] = +{ + /* Zig-Zag scan pattern */ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63 +}; + +const int vl_zscan_alternate[] = +{ + /* Alternate scan pattern */ + 0, 8,16,24, 1, 9, 2,10, + 17,25,32,40,48,56,57,49, + 41,33,26,18, 3,11, 4,12, + 19,27,34,42,50,58,35,43, + 51,59,20,28, 5,13, 6,14, + 21,29,36,44,52,60,37,45, + 53,61,22,30, 7,15,23,31, + 38,46,54,62,39,47,55,63 +}; + +static void * +create_vert_shader(struct vl_zscan *zscan) +{ + struct ureg_program *shader; + + struct ureg_src scale; + struct ureg_src vrect, vpos, block_num; + + struct ureg_dst tmp; + struct ureg_dst o_vpos, o_vtex[zscan->num_channels]; + + signed i; + + shader = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!shader) + return NULL; + + scale = ureg_imm2f(shader, + (float)BLOCK_WIDTH / zscan->buffer_width, + (float)BLOCK_HEIGHT / zscan->buffer_height); + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM); + + tmp = ureg_DECL_temporary(shader); + + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); + + for (i = 0; i < zscan->num_channels; ++i) + o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i); + + /* + * o_vpos.xy = (vpos + vrect) * scale + * o_vpos.zw = 1.0f + * + * tmp.xy = InstanceID / blocks_per_line + * tmp.x = frac(tmp.x) + * tmp.y = floor(tmp.y) + * + * o_vtex.x = vrect.x / blocks_per_line + tmp.x + * o_vtex.y = vrect.y + * o_vtex.z = tmp.z * blocks_per_line / blocks_total + */ + ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect); + ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); + + ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X), + ureg_imm1f(shader, 1.0f / zscan->blocks_per_line)); + + ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); + ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp)); + + for (i = 0; i < zscan->num_channels; ++i) { + ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), + ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH) * (i - (signed)zscan->num_channels / 2))); + + ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect, + ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp)); + ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect); + ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos); + ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp), + ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total)); + } + + ureg_release_temporary(shader, tmp); + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, zscan->pipe); +} + +static void * +create_frag_shader(struct vl_zscan *zscan) +{ + struct ureg_program *shader; + struct ureg_src vtex[zscan->num_channels]; + + struct ureg_src samp_src, samp_scan, samp_quant; + + struct ureg_dst tmp[zscan->num_channels]; + struct ureg_dst quant, fragment; + + unsigned i; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return NULL; + + for (i = 0; i < zscan->num_channels; ++i) + vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR); + + samp_src = ureg_DECL_sampler(shader, 0); + samp_scan = ureg_DECL_sampler(shader, 1); + samp_quant = ureg_DECL_sampler(shader, 2); + + for (i = 0; i < zscan->num_channels; ++i) + tmp[i] = ureg_DECL_temporary(shader); + quant = ureg_DECL_temporary(shader); + + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + /* + * tmp.x = tex(vtex, 1) + * tmp.y = vtex.z + * fragment = tex(tmp, 0) * quant + */ + for (i = 0; i < zscan->num_channels; ++i) + ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan); + + for (i = 0; i < zscan->num_channels; ++i) + ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W)); + + for (i = 0; i < zscan->num_channels; ++i) { + ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src); + ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant); + } + + ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f)); + ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant)); + + for (i = 0; i < zscan->num_channels; ++i) + ureg_release_temporary(shader, tmp[i]); + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, zscan->pipe); +} + +static bool +init_shaders(struct vl_zscan *zscan) +{ + assert(zscan); + + zscan->vs = create_vert_shader(zscan); + if (!zscan->vs) + goto error_vs; + + zscan->fs = create_frag_shader(zscan); + if (!zscan->fs) + goto error_fs; + + return true; + +error_fs: + zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs); + +error_vs: + return false; +} + +static void +cleanup_shaders(struct vl_zscan *zscan) +{ + assert(zscan); + + zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs); + zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs); +} + +static bool +init_state(struct vl_zscan *zscan) +{ + struct pipe_blend_state blend; + struct pipe_rasterizer_state rs_state; + struct pipe_sampler_state sampler; + unsigned i; + + assert(zscan); + + memset(&rs_state, 0, sizeof(rs_state)); + rs_state.gl_rasterization_rules = true; + zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state); + if (!zscan->rs_state) + goto error_rs_state; + + memset(&blend, 0, sizeof blend); + + blend.independent_blend_enable = 0; + blend.rt[0].blend_enable = 0; + blend.rt[0].rgb_func = PIPE_BLEND_ADD; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_func = PIPE_BLEND_ADD; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.rt[0].colormask = PIPE_MASK_RGBA; + blend.dither = 0; + zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend); + if (!zscan->blend) + goto error_blend; + + for (i = 0; i < 3; ++i) { + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler); + if (!zscan->samplers[i]) + goto error_samplers; + } + + return true; + +error_samplers: + for (i = 0; i < 2; ++i) + if (zscan->samplers[i]) + zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]); + + zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state); + +error_blend: + zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend); + +error_rs_state: + return false; +} + +static void +cleanup_state(struct vl_zscan *zscan) +{ + unsigned i; + + assert(zscan); + + for (i = 0; i < 3; ++i) + zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]); + + zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state); + zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend); +} + +struct pipe_sampler_view * +vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line) +{ + const unsigned total_size = blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT; + + int patched_layout[64]; + + struct pipe_resource res_tmpl, *res; + struct pipe_sampler_view sv_tmpl, *sv; + struct pipe_transfer *buf_transfer; + unsigned x, y, i, pitch; + float *f; + + struct pipe_box rect = + { + 0, 0, 0, + BLOCK_WIDTH * blocks_per_line, + BLOCK_HEIGHT, + 1 + }; + + assert(pipe && layout && blocks_per_line); + + for (i = 0; i < 64; ++i) + patched_layout[layout[i]] = i; + + memset(&res_tmpl, 0, sizeof(res_tmpl)); + res_tmpl.target = PIPE_TEXTURE_2D; + res_tmpl.format = PIPE_FORMAT_R32_FLOAT; + res_tmpl.width0 = BLOCK_WIDTH * blocks_per_line; + res_tmpl.height0 = BLOCK_HEIGHT; + res_tmpl.depth0 = 1; + res_tmpl.array_size = 1; + res_tmpl.usage = PIPE_USAGE_IMMUTABLE; + res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; + + res = pipe->screen->resource_create(pipe->screen, &res_tmpl); + if (!res) + goto error_resource; + + buf_transfer = pipe->get_transfer + ( + pipe, res, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &rect + ); + if (!buf_transfer) + goto error_transfer; + + pitch = buf_transfer->stride / sizeof(float); + + f = pipe->transfer_map(pipe, buf_transfer); + if (!f) + goto error_map; + + for (i = 0; i < blocks_per_line; ++i) + for (y = 0; y < BLOCK_HEIGHT; ++y) + for (x = 0; x < BLOCK_WIDTH; ++x) { + float addr = patched_layout[x + y * BLOCK_WIDTH] + + i * BLOCK_WIDTH * BLOCK_HEIGHT; + + addr /= total_size; + + f[i * BLOCK_WIDTH + y * pitch + x] = addr; + } + + pipe->transfer_unmap(pipe, buf_transfer); + pipe->transfer_destroy(pipe, buf_transfer); + + memset(&sv_tmpl, 0, sizeof(sv_tmpl)); + u_sampler_view_default_template(&sv_tmpl, res, res->format); + sv = pipe->create_sampler_view(pipe, res, &sv_tmpl); + pipe_resource_reference(&res, NULL); + if (!sv) + goto error_map; + + return sv; + +error_map: + pipe->transfer_destroy(pipe, buf_transfer); + +error_transfer: + pipe_resource_reference(&res, NULL); + +error_resource: + return NULL; +} + +bool +vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe, + unsigned buffer_width, unsigned buffer_height, + unsigned blocks_per_line, unsigned blocks_total, + unsigned num_channels) +{ + assert(zscan && pipe); + + zscan->pipe = pipe; + zscan->buffer_width = buffer_width; + zscan->buffer_height = buffer_height; + zscan->num_channels = num_channels; + zscan->blocks_per_line = blocks_per_line; + zscan->blocks_total = blocks_total; + + if(!init_shaders(zscan)) + return false; + + if(!init_state(zscan)) { + cleanup_shaders(zscan); + return false; + } + + return true; +} + +void +vl_zscan_cleanup(struct vl_zscan *zscan) +{ + assert(zscan); + + cleanup_shaders(zscan); + cleanup_state(zscan); +} + +bool +vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, + struct pipe_sampler_view *src, struct pipe_surface *dst) +{ + struct pipe_resource res_tmpl, *res; + struct pipe_sampler_view sv_tmpl; + + assert(zscan && buffer); + + memset(buffer, 0, sizeof(struct vl_zscan_buffer)); + + buffer->zscan = zscan; + + pipe_sampler_view_reference(&buffer->src, src); + + buffer->viewport.scale[0] = dst->width; + buffer->viewport.scale[1] = dst->height; + buffer->viewport.scale[2] = 1; + buffer->viewport.scale[3] = 1; + buffer->viewport.translate[0] = 0; + buffer->viewport.translate[1] = 0; + buffer->viewport.translate[2] = 0; + buffer->viewport.translate[3] = 0; + + buffer->fb_state.width = dst->width; + buffer->fb_state.height = dst->height; + buffer->fb_state.nr_cbufs = 1; + pipe_surface_reference(&buffer->fb_state.cbufs[0], dst); + + memset(&res_tmpl, 0, sizeof(res_tmpl)); + res_tmpl.target = PIPE_TEXTURE_3D; + res_tmpl.format = PIPE_FORMAT_R8_UNORM; + res_tmpl.width0 = BLOCK_WIDTH * zscan->blocks_per_line; + res_tmpl.height0 = BLOCK_HEIGHT; + res_tmpl.depth0 = 2; + res_tmpl.array_size = 1; + res_tmpl.usage = PIPE_USAGE_IMMUTABLE; + res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; + + res = zscan->pipe->screen->resource_create(zscan->pipe->screen, &res_tmpl); + if (!res) + return false; + + memset(&sv_tmpl, 0, sizeof(sv_tmpl)); + u_sampler_view_default_template(&sv_tmpl, res, res->format); + sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = TGSI_SWIZZLE_X; + buffer->quant = zscan->pipe->create_sampler_view(zscan->pipe, res, &sv_tmpl); + pipe_resource_reference(&res, NULL); + if (!buffer->quant) + return false; + + return true; +} + +void +vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer) +{ + assert(buffer); + + pipe_sampler_view_reference(&buffer->src, NULL); + pipe_sampler_view_reference(&buffer->layout, NULL); + pipe_sampler_view_reference(&buffer->quant, NULL); + pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL); +} + +void +vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout) +{ + assert(buffer); + assert(layout); + + pipe_sampler_view_reference(&buffer->layout, layout); +} + +void +vl_zscan_upload_quant(struct vl_zscan_buffer *buffer, + const uint8_t intra_matrix[64], + const uint8_t non_intra_matrix[64]) +{ + struct pipe_context *pipe; + struct pipe_transfer *buf_transfer; + unsigned x, y, i, pitch; + uint8_t *intra, *non_intra; + + struct pipe_box rect = + { + 0, 0, 0, + BLOCK_WIDTH, + BLOCK_HEIGHT, + 2 + }; + + assert(buffer); + assert(intra_matrix); + assert(non_intra_matrix); + + pipe = buffer->zscan->pipe; + + rect.width *= buffer->zscan->blocks_per_line; + + buf_transfer = pipe->get_transfer + ( + pipe, buffer->quant->texture, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &rect + ); + if (!buf_transfer) + goto error_transfer; + + pitch = buf_transfer->stride; + + non_intra = pipe->transfer_map(pipe, buf_transfer); + if (!non_intra) + goto error_map; + + intra = non_intra + BLOCK_HEIGHT * pitch; + + for (i = 0; i < buffer->zscan->blocks_per_line; ++i) + for (y = 0; y < BLOCK_HEIGHT; ++y) + for (x = 0; x < BLOCK_WIDTH; ++x) { + intra[i * BLOCK_WIDTH + y * pitch + x] = intra_matrix[x + y * BLOCK_WIDTH]; + non_intra[i * BLOCK_WIDTH + y * pitch + x] = non_intra_matrix[x + y * BLOCK_WIDTH]; + } + + pipe->transfer_unmap(pipe, buf_transfer); + +error_map: + pipe->transfer_destroy(pipe, buf_transfer); + +error_transfer: + return; +} + +void +vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances) +{ + struct vl_zscan *zscan; + + assert(buffer); + + zscan = buffer->zscan; + + zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state); + zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend); + zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 3, zscan->samplers); + zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state); + zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport); + zscan->pipe->set_fragment_sampler_views(zscan->pipe, 3, &buffer->src); + zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs); + zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs); + util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); +} diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h new file mode 100644 index 00000000000..be12b8e873a --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_zscan.h @@ -0,0 +1,103 @@ +/************************************************************************** + * + * Copyright 2011 Christian König + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_zscan_h +#define vl_zscan_h + +#include <pipe/p_compiler.h> +#include <pipe/p_state.h> + +/* + * shader based zscan and quantification + * expect usage of vl_vertex_buffers as a todo list + */ +struct vl_zscan +{ + struct pipe_context *pipe; + + unsigned buffer_width; + unsigned buffer_height; + + unsigned num_channels; + + unsigned blocks_per_line; + unsigned blocks_total; + + void *rs_state; + void *blend; + + void *samplers[3]; + + void *vs, *fs; +}; + +struct vl_zscan_buffer +{ + struct vl_zscan *zscan; + + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state fb_state; + + struct pipe_sampler_view *src, *layout, *quant; + struct pipe_surface *dst; +}; + +extern const int vl_zscan_linear[]; +extern const int vl_zscan_normal[]; +extern const int vl_zscan_alternate[]; + +struct pipe_sampler_view * +vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line); + +bool +vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe, + unsigned buffer_width, unsigned buffer_height, + unsigned blocks_per_line, unsigned blocks_total, + unsigned num_channels); + +void +vl_zscan_cleanup(struct vl_zscan *zscan); + +bool +vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, + struct pipe_sampler_view *src, struct pipe_surface *dst); + +void +vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer); + +void +vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout); + +void +vl_zscan_upload_quant(struct vl_zscan_buffer *buffer, + const uint8_t intra_matrix[64], + const uint8_t non_intra_matrix[64]); + +void +vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances); + +#endif |