From ca79aeb91e914ac1a4774d51ca49911406377407 Mon Sep 17 00:00:00 2001 From: Christian König Date: Sun, 17 Apr 2011 12:15:14 +0200 Subject: [g3dvl] rename vl_mpeg12_mc_renderer into vl_mc It's still not 100% free from mpeg12 specific stuff, but should now be a good start for other codecs. --- src/gallium/auxiliary/vl/vl_mc.h | 85 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 src/gallium/auxiliary/vl/vl_mc.h (limited to 'src/gallium/auxiliary/vl/vl_mc.h') diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h new file mode 100644 index 00000000000..e5b16b5b9da --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mc.h @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_mc_h +#define vl_mc_h + +#include +#include + +#include "vl_types.h" + +struct pipe_context; + +struct vl_mc +{ + struct pipe_context *pipe; + unsigned buffer_width; + unsigned buffer_height; + unsigned macroblock_size; + + void *rs_state; + + void *blend_clear, *blend_add; + void *vs_ref, *vs_ycbcr; + void *fs_ref, *fs_ycbcr; + void *sampler_ref, *sampler_ycbcr; +}; + +struct vl_mc_buffer +{ + struct vl_mc *renderer; + + bool surface_cleared; + + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state fb_state; + + struct pipe_sampler_view *source; +}; + +bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, + unsigned picture_width, unsigned picture_height, + unsigned macroblock_size, float scale); + +void vl_mc_cleanup(struct vl_mc *renderer); + +bool vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer, + struct pipe_sampler_view *source); + +void vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer); + +void vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface); + +void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref, + unsigned not_empty_start_instance, unsigned not_empty_num_instances, + unsigned empty_start_instance, unsigned empty_num_instances); + +void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, + unsigned not_empty_start_instance, unsigned not_empty_num_instances); + +#endif /* vl_mc_h */ -- cgit v1.2.3 From b8a6e0e6fc451096d684a1e18529ab4879cdba0a Mon Sep 17 00:00:00 2001 From: Christian König Date: Sun, 17 Apr 2011 18:53:22 +0200 Subject: [g3dvl] give mv their own vertex buffer back --- src/gallium/auxiliary/vl/vl_defines.h | 3 + src/gallium/auxiliary/vl/vl_mc.c | 25 +-- src/gallium/auxiliary/vl/vl_mc.h | 7 +- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 54 +++--- src/gallium/auxiliary/vl/vl_mpeg12_decoder.h | 10 +- src/gallium/auxiliary/vl/vl_vertex_buffers.c | 230 +++++++++++++++++-------- src/gallium/auxiliary/vl/vl_vertex_buffers.h | 32 ++-- src/gallium/auxiliary/vl/vl_video_buffer.h | 2 +- src/gallium/include/pipe/p_video_state.h | 14 +- src/gallium/state_trackers/xorg/xvmc/surface.c | 20 +-- 10 files changed, 246 insertions(+), 151 deletions(-) (limited to 'src/gallium/auxiliary/vl/vl_mc.h') diff --git a/src/gallium/auxiliary/vl/vl_defines.h b/src/gallium/auxiliary/vl/vl_defines.h index 668991f904f..7568db027e6 100644 --- a/src/gallium/auxiliary/vl/vl_defines.h +++ b/src/gallium/auxiliary/vl/vl_defines.h @@ -35,4 +35,7 @@ #define BLOCK_WIDTH 8 #define BLOCK_HEIGHT 8 +#define VL_MAX_PLANES 3 +#define VL_MAX_REF_FRAMES 2 + #endif diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index 707a4a27077..2624d0502c1 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -612,17 +612,12 @@ prepare_pipe_4_rendering(struct vl_mc_buffer *buffer) } void -vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref, - unsigned not_empty_start_instance, unsigned not_empty_num_instances, - unsigned empty_start_instance, unsigned empty_num_instances) +vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref) { struct vl_mc *renderer; assert(buffer && ref); - if (not_empty_num_instances == 0 && empty_num_instances == 0) - return; - prepare_pipe_4_rendering(buffer); renderer = buffer->renderer; @@ -633,24 +628,19 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref, renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &ref); renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ref); - if (not_empty_num_instances > 0) - util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, - not_empty_start_instance, not_empty_num_instances); - - if (empty_num_instances > 0) - util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, - empty_start_instance, empty_num_instances); + util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, + renderer->buffer_width / MACROBLOCK_WIDTH * + renderer->buffer_height / MACROBLOCK_HEIGHT); } void -vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, - unsigned not_empty_start_instance, unsigned not_empty_num_instances) +vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned num_instances) { struct vl_mc *renderer; assert(buffer); - if (not_empty_num_instances == 0) + if (num_instances == 0) return; prepare_pipe_4_rendering(buffer); @@ -663,6 +653,5 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &buffer->source); renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ycbcr); - util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, - not_empty_start_instance, not_empty_num_instances); + util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); } diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h index e5b16b5b9da..bc2b0e7f149 100644 --- a/src/gallium/auxiliary/vl/vl_mc.h +++ b/src/gallium/auxiliary/vl/vl_mc.h @@ -75,11 +75,8 @@ void vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer); void vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface); -void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref, - unsigned not_empty_start_instance, unsigned not_empty_num_instances, - unsigned empty_start_instance, unsigned empty_num_instances); +void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref); -void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, - unsigned not_empty_start_instance, unsigned not_empty_num_instances); +void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned num_instances); #endif /* vl_mc_h */ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 6d10cd16e32..1d8f0f92c37 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -293,6 +293,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv[i]); pipe_resource_reference(&dec->quads.buffer, NULL); + pipe_resource_reference(&dec->pos.buffer, NULL); FREE(dec); } @@ -389,15 +390,9 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder) buffer->base.add_macroblocks = vl_mpeg12_buffer_add_macroblocks; buffer->base.unmap = vl_mpeg12_buffer_unmap; - buffer->vertex_bufs.individual.quad.stride = dec->quads.stride; - buffer->vertex_bufs.individual.quad.buffer_offset = dec->quads.buffer_offset; - pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, dec->quads.buffer); - - buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, dec->pipe, - dec->base.width / MACROBLOCK_WIDTH * - dec->base.height / MACROBLOCK_HEIGHT); - if (!buffer->vertex_bufs.individual.stream.buffer) - goto error_vertex_stream; + vl_vb_init(&buffer->vertex_stream, dec->pipe, + dec->base.width / MACROBLOCK_WIDTH, + dec->base.height / MACROBLOCK_HEIGHT); formats[0] = formats[1] = formats[2] =dec->mc_source_format; buffer->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe, @@ -461,7 +456,9 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, struct pipe_sampler_view **sv[2]; struct pipe_surface **surfaces; - unsigned ne_start, ne_num, e_start, e_num; + struct pipe_vertex_buffer vb[3]; + + unsigned num_instances; unsigned i, j; assert(buf); @@ -474,9 +471,10 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, surfaces = dst->get_surfaces(dst); - vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num); + num_instances = vl_vb_restart(&buf->vertex_stream); - dec->pipe->set_vertex_buffers(dec->pipe, 2, buf->vertex_bufs.all); + vb[0] = dec->quads; + vb[1] = dec->pos; for (i = 0; i < VL_MAX_PLANES; ++i) { vl_mc_set_surface(&buf->mc[i], surfaces[i]); @@ -484,18 +482,25 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, for (j = 0; j < 2; ++j) { if (sv[j] == NULL) continue; + vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);; + dec->pipe->set_vertex_buffers(dec->pipe, 3, vb); + dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv[j]); - vl_mc_render_ref(&buf->mc[i], sv[j][i], ne_start, ne_num, e_start, e_num); + vl_mc_render_ref(&buf->mc[i], sv[j][i]); } + } - dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_eb[i]); + vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream); + dec->pipe->set_vertex_buffers(dec->pipe, 2, vb); + for (i = 0; i < VL_MAX_PLANES; ++i) { + dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_eb[i]); if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) - vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], ne_num); - - vl_mc_render_ycbcr(&buf->mc[i], ne_start, ne_num); + vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_instances); + vl_mc_render_ycbcr(&buf->mc[i], num_instances); } + dec->pipe->flush(dec->pipe, fence); } @@ -503,11 +508,10 @@ static void vl_mpeg12_decoder_clear_buffer(struct pipe_video_decode_buffer *buffer) { struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer; - unsigned ne_start, ne_num, e_start, e_num; assert(buf); - vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num); + vl_vb_restart(&buf->vertex_stream); } static bool @@ -691,18 +695,24 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context, dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer; dec->base.clear_buffer = vl_mpeg12_decoder_clear_buffer; + dec->base.width = align(width, MACROBLOCK_WIDTH); + dec->base.height = align(height, MACROBLOCK_HEIGHT); + dec->pipe = pipe; dec->quads = vl_vb_upload_quads(dec->pipe, 2, 2); + dec->pos = vl_vb_upload_pos( + dec->pipe, + dec->base.width / MACROBLOCK_WIDTH, + dec->base.height / MACROBLOCK_HEIGHT + ); + for (i = 0; i < VL_MAX_PLANES; ++i) dec->ves_eb[i] = vl_vb_get_ves_eb(dec->pipe, i); for (i = 0; i < 2; ++i) dec->ves_mv[i] = vl_vb_get_ves_mv(dec->pipe, i); - dec->base.width = align(width, MACROBLOCK_WIDTH); - dec->base.height = align(height, MACROBLOCK_HEIGHT); - /* TODO: Implement 422, 444 */ assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); dec->empty_block_mask = &const_empty_block_mask_420; diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h index ffb9e56a13c..2bd5591b463 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h @@ -52,6 +52,8 @@ struct vl_mpeg12_decoder enum pipe_format mc_source_format; struct pipe_vertex_buffer quads; + struct pipe_vertex_buffer pos; + void *ves_eb[VL_MAX_PLANES]; void *ves_mv[2]; @@ -71,14 +73,6 @@ struct vl_mpeg12_buffer struct pipe_video_buffer *idct_intermediate; struct pipe_video_buffer *mc_source; - union - { - struct pipe_vertex_buffer all[2]; - struct { - struct pipe_vertex_buffer quad, stream; - } individual; - } vertex_bufs; - struct vl_idct_buffer idct[VL_MAX_PLANES]; struct vl_mc_buffer mc[VL_MAX_PLANES]; diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c index 84dfc9eccf6..c923686b06e 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c @@ -37,7 +37,11 @@ struct vl_vertex_stream uint8_t dct_type_field; uint8_t dummy[2]; uint8_t eb[3][2][2]; - struct vertex4s mv[4]; +}; + +struct vl_mv_vertex_stream +{ + struct vertex4s mv[2]; }; /* vertices for a quad covering a block */ @@ -96,6 +100,52 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned blocks_x, unsigned blocks return quad; } +struct pipe_vertex_buffer +vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height) +{ + struct pipe_vertex_buffer pos; + struct pipe_transfer *buf_transfer; + struct vertex2s *v; + + unsigned x, y; + + assert(pipe); + + /* create buffer */ + pos.stride = sizeof(struct vertex2s); + pos.buffer_offset = 0; + pos.buffer = pipe_buffer_create + ( + pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + sizeof(struct vertex2s) * width * height + ); + + if(!pos.buffer) + return pos; + + /* and fill it */ + v = pipe_buffer_map + ( + pipe, + pos.buffer, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &buf_transfer + ); + + for ( y = 0; y < height; ++y) { + for ( x = 0; x < width; ++x, ++v) { + v->x = x; + v->y = y; + } + } + + pipe_buffer_unmap(pipe, buf_transfer); + + return pos; +} + static struct pipe_vertex_element vl_vb_get_quad_vertex_element(void) { @@ -159,50 +209,83 @@ vl_vb_get_ves_mv(struct pipe_context *pipe, int motionvector) { struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS]; + assert(pipe); + memset(&vertex_elems, 0, sizeof(vertex_elems)); vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); - assert(pipe); - /* Position element */ vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED; + vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1); + /* motion vector TOP element */ - vertex_elems[VS_I_MV_TOP].src_offset = offsetof(struct vl_vertex_stream, mv[motionvector * 2]); vertex_elems[VS_I_MV_TOP].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED; /* motion vector BOTTOM element */ vertex_elems[VS_I_MV_BOTTOM].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED; - vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - 1, 1); + vl_vb_element_helper(&vertex_elems[VS_I_MV_TOP], 2, 2); return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems); } -struct pipe_vertex_buffer -vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned size) +void +vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned width, unsigned height) { - struct pipe_vertex_buffer buf; + unsigned i; assert(buffer); - buffer->size = size; - buffer->num_not_empty = 0; - buffer->num_empty = 0; + buffer->width = width; + buffer->height = height; + buffer->num_instances = 0; - buf.stride = sizeof(struct vl_vertex_stream); - buf.buffer_offset = 0; - buf.buffer = pipe_buffer_create + buffer->resource = pipe_buffer_create ( pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, - sizeof(struct vl_vertex_stream) * size + sizeof(struct vl_vertex_stream) * width * height ); - pipe_resource_reference(&buffer->resource, buf.buffer); + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) { + buffer->mv[i].resource = pipe_buffer_create + ( + pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + sizeof(struct vl_mv_vertex_stream) * width * height + ); + } vl_vb_map(buffer, pipe); +} + +struct pipe_vertex_buffer +vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer) +{ + struct pipe_vertex_buffer buf; + + assert(buffer); + + buf.stride = sizeof(struct vl_vertex_stream); + buf.buffer_offset = 0; + buf.buffer = buffer->resource; + + return buf; +} + +struct pipe_vertex_buffer +vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector) +{ + struct pipe_vertex_buffer buf; + + assert(buffer); + + buf.stride = sizeof(struct vl_mv_vertex_stream); + buf.buffer_offset = 0; + buf.buffer = buffer->mv[motionvector].resource; return buf; } @@ -210,110 +293,119 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe) { + unsigned i; + assert(buffer && pipe); - buffer->start = pipe_buffer_map + buffer->buffer = pipe_buffer_map ( pipe, buffer->resource, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, &buffer->transfer ); - buffer->end = buffer->start + buffer->resource->width0 / sizeof(struct vl_vertex_stream); + + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) { + buffer->mv[i].vertex_stream = pipe_buffer_map + ( + pipe, + buffer->mv[i].resource, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &buffer->mv[i].transfer + ); + } + } static void -get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex4s mv[4]) +get_motion_vectors(enum pipe_mpeg12_motion_type mo_type, struct pipe_motionvector *src, struct vertex4s dst[2]) { - if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { - mv[0].x = mv[1].x = mb->mv[0].top.x; - mv[0].y = mv[1].y = mb->mv[0].top.y; - mv[0].z = mv[1].z = 0; - - mv[2].x = mv[3].x = mb->mv[1].top.x; - mv[2].y = mv[3].y = mb->mv[1].top.y; - mv[2].z = mv[3].z = 0; + if (mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { + dst[0].x = dst[1].x = src->top.x; + dst[0].y = dst[1].y = src->top.y; + dst[0].z = dst[1].z = 0; } else { - mv[0].x = mb->mv[0].top.x; - mv[0].y = mb->mv[0].top.y; - mv[0].z = mb->mv[0].top.field_select ? 3 : 1; - - mv[1].x = mb->mv[0].bottom.x; - mv[1].y = mb->mv[0].bottom.y; - mv[1].z = mb->mv[0].bottom.field_select ? 3 : 1; + dst[0].x = src->top.x; + dst[0].y = src->top.y; + dst[0].z = src->top.field_select ? 3 : 1; - mv[2].x = mb->mv[1].top.x; - mv[2].y = mb->mv[1].top.y; - mv[2].z = mb->mv[1].top.field_select ? 3 : 1; - - mv[3].x = mb->mv[1].bottom.x; - mv[3].y = mb->mv[1].bottom.y; - mv[3].z = mb->mv[1].bottom.field_select ? 3 : 1; + dst[1].x = src->bottom.x; + dst[1].y = src->bottom.y; + dst[1].z = src->bottom.field_select ? 3 : 1; } - mv[0].w = mv[1].w = mb->mv[0].wheight; - mv[2].w = mv[3].w = mb->mv[1].wheight; + dst[0].w = src->top.wheight; + dst[1].w = src->bottom.wheight; } void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb, const unsigned (*empty_block_mask)[3][2][2]) { - struct vl_vertex_stream *stream; unsigned i, j, k; + unsigned mv_pos; assert(buffer); assert(mb); - assert(buffer->num_not_empty + buffer->num_empty < buffer->size); + assert(buffer->num_instances < buffer->width * buffer->height); - if(mb->cbp) - stream = buffer->start + buffer->num_not_empty++; - else - stream = buffer->end - ++buffer->num_empty; + if(mb->cbp) { + struct vl_vertex_stream *stream; + stream = buffer->buffer + buffer->num_instances++; - stream->pos.x = mb->mbx; - stream->pos.y = mb->mby; + stream->pos.x = mb->mbx; + stream->pos.y = mb->mby; - for ( i = 0; i < 3; ++i) - for ( j = 0; j < 2; ++j) - for ( k = 0; k < 2; ++k) - stream->eb[i][j][k] = !(mb->cbp & (*empty_block_mask)[i][j][k]); + for ( i = 0; i < 3; ++i) + for ( j = 0; j < 2; ++j) + for ( k = 0; k < 2; ++k) + stream->eb[i][j][k] = !(mb->cbp & (*empty_block_mask)[i][j][k]); - stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD; - stream->mb_type_intra = mb->dct_intra; + stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD; + stream->mb_type_intra = mb->dct_intra; + } - get_motion_vectors(mb, stream->mv); + mv_pos = mb->mbx + mb->mby * buffer->width; + get_motion_vectors(mb->mo_type, &mb->mv[0], buffer->mv[0].vertex_stream[mv_pos].mv); + get_motion_vectors(mb->mo_type, &mb->mv[1], buffer->mv[1].vertex_stream[mv_pos].mv); } void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe) { + unsigned i; + assert(buffer && pipe); pipe_buffer_unmap(pipe, buffer->transfer); + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) { + pipe_buffer_unmap(pipe, buffer->mv[i].transfer); + } } -void -vl_vb_restart(struct vl_vertex_buffer *buffer, - unsigned *not_empty_start_instance, unsigned *not_empty_num_instances, - unsigned *empty_start_instance, unsigned *empty_num_instances) +unsigned +vl_vb_restart(struct vl_vertex_buffer *buffer) { - assert(buffer); + unsigned num_instances; - *not_empty_start_instance = 0; - *not_empty_num_instances = buffer->num_not_empty; - *empty_start_instance = buffer->size - buffer->num_empty; - *empty_num_instances = buffer->num_empty; + assert(buffer); - buffer->num_not_empty = 0; - buffer->num_empty = 0; + num_instances = buffer->num_instances; + buffer->num_instances = 0; + return num_instances; } void vl_vb_cleanup(struct vl_vertex_buffer *buffer) { + unsigned i; + assert(buffer); pipe_resource_reference(&buffer->resource, NULL); + + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) { + pipe_resource_reference(&buffer->mv[i].resource, NULL); + } } diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h index 62f7bf00508..bafaff7a311 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h @@ -30,6 +30,7 @@ #include #include +#include "vl_defines.h" #include "vl_types.h" /* vertex buffers act as a todo list @@ -54,25 +55,36 @@ enum VS_INPUT struct vl_vertex_buffer { - unsigned size; - unsigned num_not_empty; - unsigned num_empty; + unsigned width, height; + unsigned num_instances; + struct pipe_resource *resource; struct pipe_transfer *transfer; - struct vl_vertex_stream *start; - struct vl_vertex_stream *end; + struct vl_vertex_stream *buffer; + + struct { + struct pipe_resource *resource; + struct pipe_transfer *transfer; + struct vl_mv_vertex_stream *vertex_stream; + } mv[VL_MAX_REF_FRAMES]; }; struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned blocks_x, unsigned blocks_y); +struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height); + void *vl_vb_get_ves_eb(struct pipe_context *pipe, int component); void *vl_vb_get_ves_mv(struct pipe_context *pipe, int motionvector); -struct pipe_vertex_buffer vl_vb_init(struct vl_vertex_buffer *buffer, - struct pipe_context *pipe, - unsigned max_blocks); +void vl_vb_init(struct vl_vertex_buffer *buffer, + struct pipe_context *pipe, + unsigned width, unsigned height); + +struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer); + +struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector); void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); @@ -81,9 +93,7 @@ void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macrobl void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); -void vl_vb_restart(struct vl_vertex_buffer *buffer, - unsigned *not_empty_start_instance, unsigned *not_empty_num_instances, - unsigned *empty_start_instance, unsigned *empty_num_instances); +unsigned vl_vb_restart(struct vl_vertex_buffer *buffer); void vl_vb_cleanup(struct vl_vertex_buffer *buffer); diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h index f5c424cf296..960acd28060 100644 --- a/src/gallium/auxiliary/vl/vl_video_buffer.h +++ b/src/gallium/auxiliary/vl/vl_video_buffer.h @@ -31,7 +31,7 @@ #include #include -#define VL_MAX_PLANES 3 +#include "vl_defines.h" /** * implementation of a planar ycbcr buffer diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 72a27938847..2cf5ea4d975 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -69,10 +69,13 @@ struct pipe_macroblock enum pipe_video_codec codec; }; -struct pipe_mpeg12_motionvector +struct pipe_motionvector { - signed x, y; - bool field_select; + struct { + signed x, y; + bool field_select; + unsigned wheight:8; + } top, bottom; }; struct pipe_mpeg12_macroblock @@ -84,10 +87,7 @@ struct pipe_mpeg12_macroblock enum pipe_mpeg12_motion_type mo_type; bool dct_intra; enum pipe_mpeg12_dct_type dct_type; - struct { - unsigned wheight:8; - struct pipe_mpeg12_motionvector top, bottom; - } mv[2]; + struct pipe_motionvector mv[2]; unsigned cbp; short *blocks; }; diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c index f22d315c90d..9752497eb30 100644 --- a/src/gallium/state_trackers/xorg/xvmc/surface.c +++ b/src/gallium/state_trackers/xorg/xvmc/surface.c @@ -114,28 +114,28 @@ MacroBlocksToPipe(struct pipe_screen *screen, switch (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) { case XVMC_MB_TYPE_MOTION_FORWARD: - mb->mv[0].wheight = 255; - mb->mv[1].wheight = 0; + mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 255; + mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 0; break; case (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD): - mb->mv[0].wheight = 127; - mb->mv[1].wheight = 127; + mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 127; + mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 127; break; case XVMC_MB_TYPE_MOTION_BACKWARD: - mb->mv[0].wheight = 0; - mb->mv[1].wheight = 255; + mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 0; + mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 255; break; default: /* workaround for xines xxmc video out plugin */ if (!(xvmc_mb->macroblock_type & ~XVMC_MB_TYPE_PATTERN)) { - mb->mv[0].wheight = 255; - mb->mv[1].wheight = 0; + mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 255; + mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 0; } else { - mb->mv[0].wheight = 0; - mb->mv[1].wheight = 0; + mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 0; + mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 0; } break; } -- cgit v1.2.3 From 3ea7e2713c836f23d59c4034385609e371a94c8d Mon Sep 17 00:00:00 2001 From: Christian König Date: Sun, 24 Apr 2011 19:20:33 +0200 Subject: [g3dvl] start supporting different render target formats Let's start with NV12, but anything else shouldn't be much of a problem any more. --- src/gallium/auxiliary/vl/vl_compositor.c | 2 +- src/gallium/auxiliary/vl/vl_context.c | 35 ++++++++-- src/gallium/auxiliary/vl/vl_mc.c | 95 ++++++++++++++------------ src/gallium/auxiliary/vl/vl_mc.h | 14 ++-- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 53 ++++++++------ src/gallium/auxiliary/vl/vl_video_buffer.c | 73 ++++++++++++++------ src/gallium/auxiliary/vl/vl_video_buffer.h | 3 +- src/gallium/include/pipe/p_video_context.h | 7 +- src/gallium/state_trackers/vdpau/surface.c | 2 +- src/gallium/state_trackers/xorg/xvmc/surface.c | 2 +- 10 files changed, 181 insertions(+), 105 deletions(-) (limited to 'src/gallium/auxiliary/vl/vl_mc.h') diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 25f7d5fa1da..e487abf915e 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -506,7 +506,7 @@ vl_compositor_set_buffer_layer(struct pipe_video_compositor *compositor, c->used_layers |= 1 << layer; c->layers[layer].fs = c->fs_video_buffer; - sampler_views = buffer->get_sampler_views(buffer); + sampler_views = buffer->get_sampler_view_components(buffer); for (i = 0; i < 3; ++i) pipe_sampler_view_reference(&c->layers[layer].sampler_views[i], sampler_views[i]); diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c index 1240b0b4c32..be28bb507e6 100644 --- a/src/gallium/auxiliary/vl/vl_context.c +++ b/src/gallium/auxiliary/vl/vl_context.c @@ -35,6 +35,18 @@ #include "vl_compositor.h" #include "vl_mpeg12_decoder.h" +const enum pipe_format const_resource_formats_YV12[3] = { + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8_UNORM +}; + +const enum pipe_format const_resource_formats_NV12[3] = { + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8G8_UNORM, + PIPE_FORMAT_NONE +}; + static void vl_context_destroy(struct pipe_video_context *context) { @@ -202,19 +214,28 @@ vl_context_create_buffer(struct pipe_video_context *context, enum pipe_video_chroma_format chroma_format, unsigned width, unsigned height) { - const enum pipe_format resource_formats[3] = { - PIPE_FORMAT_R8_UNORM, - PIPE_FORMAT_R8_UNORM, - PIPE_FORMAT_R8_UNORM - }; - struct vl_context *ctx = (struct vl_context*)context; struct pipe_video_buffer *result; unsigned buffer_width, buffer_height; + const enum pipe_format *resource_formats; + assert(context); assert(width > 0 && height > 0); - assert(buffer_format == PIPE_FORMAT_YV12); + + switch(buffer_format) { + case PIPE_FORMAT_YV12: + resource_formats = const_resource_formats_YV12; + break; + + case PIPE_FORMAT_NV12: + resource_formats = const_resource_formats_NV12; + break; + + default: + assert(0); + return NULL; + } buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : width; buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : height; diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index 137a1beaa0d..ecdce6b28bd 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -368,6 +368,7 @@ init_pipe_state(struct vl_mc *r) struct pipe_sampler_state sampler; struct pipe_blend_state blend; struct pipe_rasterizer_state rs_state; + unsigned i; assert(r); @@ -391,28 +392,30 @@ init_pipe_state(struct vl_mc *r) if (!r->sampler_ycbcr) goto error_sampler_ycbcr; - memset(&blend, 0, sizeof blend); - blend.independent_blend_enable = 0; - blend.rt[0].blend_enable = 1; - blend.rt[0].rgb_func = PIPE_BLEND_ADD; - blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; - blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].alpha_func = PIPE_BLEND_ADD; - blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; - blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.logicop_enable = 0; - blend.logicop_func = PIPE_LOGICOP_CLEAR; - blend.rt[0].colormask = PIPE_MASK_RGBA; - blend.dither = 0; - r->blend_clear = r->pipe->create_blend_state(r->pipe, &blend); - if (!r->blend_clear) - goto error_blend_clear; - - blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; - r->blend_add = r->pipe->create_blend_state(r->pipe, &blend); - if (!r->blend_add) - goto error_blend_add; + for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { + memset(&blend, 0, sizeof blend); + blend.independent_blend_enable = 0; + blend.rt[0].blend_enable = 1; + blend.rt[0].rgb_func = PIPE_BLEND_ADD; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_func = PIPE_BLEND_ADD; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + blend.rt[0].colormask = i; + blend.dither = 0; + r->blend_clear[i] = r->pipe->create_blend_state(r->pipe, &blend); + if (!r->blend_clear[i]) + goto error_blend; + + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + r->blend_add[i] = r->pipe->create_blend_state(r->pipe, &blend); + if (!r->blend_add[i]) + goto error_blend; + } memset(&rs_state, 0, sizeof(rs_state)); /*rs_state.sprite_coord_enable */ @@ -427,12 +430,15 @@ init_pipe_state(struct vl_mc *r) return true; error_rs_state: - r->pipe->delete_blend_state(r->pipe, r->blend_add); +error_blend: + for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { + if (r->blend_add[i]) + r->pipe->delete_blend_state(r->pipe, r->blend_add[i]); -error_blend_add: - r->pipe->delete_blend_state(r->pipe, r->blend_clear); + if (r->blend_clear[i]) + r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]); + } -error_blend_clear: r->pipe->delete_sampler_state(r->pipe, r->sampler_ref); error_sampler_ref: @@ -445,12 +451,16 @@ error_sampler_ycbcr: static void cleanup_pipe_state(struct vl_mc *r) { + unsigned i; + assert(r); r->pipe->delete_sampler_state(r->pipe, r->sampler_ref); r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr); - r->pipe->delete_blend_state(r->pipe, r->blend_clear); - r->pipe->delete_blend_state(r->pipe, r->blend_add); + for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { + r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]); + r->pipe->delete_blend_state(r->pipe, r->blend_add[i]); + } r->pipe->delete_rasterizer_state(r->pipe, r->rs_state); } @@ -520,11 +530,9 @@ vl_mc_cleanup(struct vl_mc *renderer) } bool -vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer, - struct pipe_sampler_view *source) +vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer) { assert(renderer && buffer); - assert(source); buffer->renderer = renderer; @@ -538,8 +546,6 @@ vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer, buffer->fb_state.nr_cbufs = 1; buffer->fb_state.zsbuf = NULL; - pipe_sampler_view_reference(&buffer->source, source); - return true; } @@ -547,8 +553,6 @@ void vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer) { assert(buffer); - - pipe_sampler_view_reference(&buffer->source, NULL); } void @@ -567,7 +571,7 @@ vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface) } static void -prepare_pipe_4_rendering(struct vl_mc_buffer *buffer) +prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned mask) { struct vl_mc *renderer; @@ -577,11 +581,9 @@ prepare_pipe_4_rendering(struct vl_mc_buffer *buffer) renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state); if (buffer->surface_cleared) - renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add); - else { - renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear); - buffer->surface_cleared = true; - } + renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add[mask]); + else + renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear[mask]); renderer->pipe->set_framebuffer_state(renderer->pipe, &buffer->fb_state); renderer->pipe->set_viewport_state(renderer->pipe, &buffer->viewport); @@ -594,7 +596,7 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref) assert(buffer && ref); - prepare_pipe_4_rendering(buffer); + prepare_pipe_4_rendering(buffer, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B); renderer = buffer->renderer; @@ -607,10 +609,13 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref) util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, renderer->buffer_width / MACROBLOCK_WIDTH * renderer->buffer_height / MACROBLOCK_HEIGHT); + + buffer->surface_cleared = true; } void -vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned num_instances) +vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source, + unsigned component, unsigned num_instances) { struct vl_mc *renderer; @@ -619,14 +624,14 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned num_instances) if (num_instances == 0) return; - prepare_pipe_4_rendering(buffer); + prepare_pipe_4_rendering(buffer, 1 << component); renderer = buffer->renderer; renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr); renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr); - renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &buffer->source); + renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &source); renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ycbcr); util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h index bc2b0e7f149..353afa9df62 100644 --- a/src/gallium/auxiliary/vl/vl_mc.h +++ b/src/gallium/auxiliary/vl/vl_mc.h @@ -31,8 +31,11 @@ #include #include +#include "vl_defines.h" #include "vl_types.h" +#define VL_MC_NUM_BLENDERS (1 << VL_MAX_PLANES) + struct pipe_context; struct vl_mc @@ -44,7 +47,8 @@ struct vl_mc void *rs_state; - void *blend_clear, *blend_add; + void *blend_clear[VL_MC_NUM_BLENDERS]; + void *blend_add[VL_MC_NUM_BLENDERS]; void *vs_ref, *vs_ycbcr; void *fs_ref, *fs_ycbcr; void *sampler_ref, *sampler_ycbcr; @@ -58,8 +62,6 @@ struct vl_mc_buffer struct pipe_viewport_state viewport; struct pipe_framebuffer_state fb_state; - - struct pipe_sampler_view *source; }; bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, @@ -68,8 +70,7 @@ bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, void vl_mc_cleanup(struct vl_mc *renderer); -bool vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer, - struct pipe_sampler_view *source); +bool vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer); void vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer); @@ -77,6 +78,7 @@ void vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref); -void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned num_instances); +void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source, + unsigned component, unsigned num_instances); #endif /* vl_mc_h */ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 5027db4314e..296f46aba52 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -97,7 +97,7 @@ init_zscan_buffer(struct vl_mpeg12_buffer *buffer) if (!buffer->zscan_source) goto error_source; - source = buffer->zscan_source->get_sampler_views(buffer->zscan_source); + source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source); if (!source) goto error_sampler; @@ -174,11 +174,11 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer) if (!buffer->idct_intermediate) goto error_intermediate; - idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source); + idct_source_sv = buffer->idct_source->get_sampler_view_planes(buffer->idct_source); if (!idct_source_sv) goto error_source_sv; - idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate); + idct_intermediate_sv = buffer->idct_intermediate->get_sampler_view_planes(buffer->idct_intermediate); if (!idct_intermediate_sv) goto error_intermediate_sv; @@ -231,7 +231,6 @@ init_mc_buffer(struct vl_mpeg12_buffer *buf) { struct vl_mpeg12_decoder *dec; enum pipe_format formats[3]; - struct pipe_sampler_view **mc_source_sv; assert(buf); @@ -247,17 +246,13 @@ init_mc_buffer(struct vl_mpeg12_buffer *buf) if (!buf->mc_source) goto error_mc_source; - mc_source_sv = buf->mc_source->get_sampler_views(buf->mc_source); - if (!mc_source_sv) - goto error_mc_source_sv; - - if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0], mc_source_sv[0])) + if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0])) goto error_mc_y; - if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[1], mc_source_sv[1])) + if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[1])) goto error_mc_cb; - if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[2], mc_source_sv[2])) + if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[2])) goto error_mc_cr; return true; @@ -269,7 +264,6 @@ error_mc_cb: vl_mc_cleanup_buffer(&buf->mc[0]); error_mc_y: -error_mc_source_sv: buf->mc_source->destroy(buf->mc_source); error_mc_source: @@ -328,7 +322,7 @@ vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer) vl_vb_map(&buf->vertex_stream, dec->pipe); - sampler_views = buf->zscan_source->get_sampler_views(buf->zscan_source); + sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source); assert(sampler_views); @@ -510,12 +504,13 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer; struct vl_mpeg12_decoder *dec; - struct pipe_sampler_view **sv[2]; + struct pipe_sampler_view **sv[VL_MAX_REF_FRAMES], **mc_source_sv; struct pipe_surface **surfaces; struct pipe_vertex_buffer vb[3]; - unsigned i, j; + unsigned i, j, component; + unsigned nr_components; assert(buf); @@ -523,19 +518,21 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, assert(dec); for (i = 0; i < 2; ++i) - sv[i] = refs[i] ? refs[i]->get_sampler_views(refs[i]) : NULL; - - surfaces = dst->get_surfaces(dst); + sv[i] = refs[i] ? refs[i]->get_sampler_view_planes(refs[i]) : NULL; vb[0] = dec->quads; vb[1] = dec->pos; + surfaces = dst->get_surfaces(dst); + dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv); for (i = 0; i < VL_MAX_PLANES; ++i) { + if (!surfaces[i]) continue; + vl_mc_set_surface(&buf->mc[i], surfaces[i]); - for (j = 0; j < 2; ++j) { - if (sv[j] == NULL) continue; + for (j = 0; j < VL_MAX_REF_FRAMES; ++j) { + if (!sv[j]) continue; vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);; dec->pipe->set_vertex_buffers(dec->pipe, 3, vb); @@ -546,7 +543,7 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_ycbcr); for (i = 0; i < VL_MAX_PLANES; ++i) { - if (num_ycbcr_blocks[i] == 0) continue; + if (!num_ycbcr_blocks[i]) continue; vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i); dec->pipe->set_vertex_buffers(dec->pipe, 2, vb); @@ -555,8 +552,20 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_ycbcr_blocks[i]); + } + + mc_source_sv = buf->mc_source->get_sampler_view_planes(buf->mc_source); + for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) { + if (!surfaces[i]) continue; - vl_mc_render_ycbcr(&buf->mc[i], num_ycbcr_blocks[i]); + nr_components = util_format_get_nr_components(surfaces[i]->texture->format); + for (j = 0; j < nr_components; ++j, ++component) { + if (!num_ycbcr_blocks[i]) continue; + + vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component); + dec->pipe->set_vertex_buffers(dec->pipe, 2, vb); + vl_mc_render_ycbcr(&buf->mc[i], mc_source_sv[component], j, num_ycbcr_blocks[component]); + } } dec->pipe->flush(dec->pipe, fence); diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c index dad8dd2c9ae..dccd7e93945 100644 --- a/src/gallium/auxiliary/vl/vl_video_buffer.c +++ b/src/gallium/auxiliary/vl/vl_video_buffer.c @@ -38,17 +38,6 @@ #include "vl_video_buffer.h" -static inline void -adjust_swizzle(struct pipe_sampler_view *sv_templ) -{ - if (util_format_get_nr_components(sv_templ->format) == 1) { - sv_templ->swizzle_r = PIPE_SWIZZLE_RED; - sv_templ->swizzle_g = PIPE_SWIZZLE_RED; - sv_templ->swizzle_b = PIPE_SWIZZLE_RED; - sv_templ->swizzle_a = PIPE_SWIZZLE_RED; - } -} - static void vl_video_buffer_destroy(struct pipe_video_buffer *buffer) { @@ -59,13 +48,14 @@ vl_video_buffer_destroy(struct pipe_video_buffer *buffer) for (i = 0; i < VL_MAX_PLANES; ++i) { pipe_surface_reference(&buf->surfaces[i], NULL); - pipe_sampler_view_reference(&buf->sampler_views[i], NULL); + pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL); + pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL); pipe_resource_reference(&buf->resources[i], NULL); } } static struct pipe_sampler_view ** -vl_video_buffer_sampler_views(struct pipe_video_buffer *buffer) +vl_video_buffer_sampler_view_planes(struct pipe_video_buffer *buffer) { struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer; struct pipe_sampler_view sv_templ; @@ -77,21 +67,63 @@ vl_video_buffer_sampler_views(struct pipe_video_buffer *buffer) pipe = buf->pipe; for (i = 0; i < buf->num_planes; ++i ) { - if (!buf->sampler_views[i]) { + if (!buf->sampler_view_planes[i]) { memset(&sv_templ, 0, sizeof(sv_templ)); u_sampler_view_default_template(&sv_templ, buf->resources[i], buf->resources[i]->format); - adjust_swizzle(&sv_templ); - buf->sampler_views[i] = pipe->create_sampler_view(pipe, buf->resources[i], &sv_templ); - if (!buf->sampler_views[i]) + + if (util_format_get_nr_components(buf->resources[i]->format) == 1) + sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = sv_templ.swizzle_a = PIPE_SWIZZLE_RED; + + buf->sampler_view_planes[i] = pipe->create_sampler_view(pipe, buf->resources[i], &sv_templ); + if (!buf->sampler_view_planes[i]) goto error; } } - return buf->sampler_views; + return buf->sampler_view_planes; error: for (i = 0; i < buf->num_planes; ++i ) - pipe_sampler_view_reference(&buf->sampler_views[i], NULL); + pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL); + + return NULL; +} + +static struct pipe_sampler_view ** +vl_video_buffer_sampler_view_components(struct pipe_video_buffer *buffer) +{ + struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer; + struct pipe_sampler_view sv_templ; + struct pipe_context *pipe; + unsigned i, j, component; + + assert(buf); + + pipe = buf->pipe; + + for (component = 0, i = 0; i < buf->num_planes; ++i ) { + unsigned nr_components = util_format_get_nr_components(buf->resources[i]->format); + + for (j = 0; j < nr_components; ++j, ++component) { + assert(component < VL_MAX_PLANES); + + if (!buf->sampler_view_components[component]) { + memset(&sv_templ, 0, sizeof(sv_templ)); + u_sampler_view_default_template(&sv_templ, buf->resources[i], buf->resources[i]->format); + sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = PIPE_SWIZZLE_RED + j; + sv_templ.swizzle_a = PIPE_SWIZZLE_ONE; + buf->sampler_view_components[component] = pipe->create_sampler_view(pipe, buf->resources[i], &sv_templ); + if (!buf->sampler_view_components[component]) + goto error; + } + } + } + + return buf->sampler_view_components; + +error: + for (i = 0; i < VL_MAX_PLANES; ++i ) + pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL); return NULL; } @@ -145,7 +177,8 @@ vl_video_buffer_init(struct pipe_video_context *context, buffer = CALLOC_STRUCT(vl_video_buffer); buffer->base.destroy = vl_video_buffer_destroy; - buffer->base.get_sampler_views = vl_video_buffer_sampler_views; + buffer->base.get_sampler_view_planes = vl_video_buffer_sampler_view_planes; + buffer->base.get_sampler_view_components = vl_video_buffer_sampler_view_components; buffer->base.get_surfaces = vl_video_buffer_surfaces; buffer->pipe = pipe; buffer->num_planes = 1; diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h index 960acd28060..2dca74f641e 100644 --- a/src/gallium/auxiliary/vl/vl_video_buffer.h +++ b/src/gallium/auxiliary/vl/vl_video_buffer.h @@ -44,7 +44,8 @@ struct vl_video_buffer struct pipe_context *pipe; unsigned num_planes; struct pipe_resource *resources[VL_MAX_PLANES]; - struct pipe_sampler_view *sampler_views[VL_MAX_PLANES]; + struct pipe_sampler_view *sampler_view_planes[VL_MAX_PLANES]; + struct pipe_sampler_view *sampler_view_components[VL_MAX_PLANES]; struct pipe_surface *surfaces[VL_MAX_PLANES]; }; diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h index 7e971641618..88d3ca1f4e4 100644 --- a/src/gallium/include/pipe/p_video_context.h +++ b/src/gallium/include/pipe/p_video_context.h @@ -235,7 +235,12 @@ struct pipe_video_buffer /** * get a individual sampler view for each plane */ - struct pipe_sampler_view **(*get_sampler_views)(struct pipe_video_buffer *buffer); + struct pipe_sampler_view **(*get_sampler_view_planes)(struct pipe_video_buffer *buffer); + + /** + * get a individual sampler view for each component + */ + struct pipe_sampler_view **(*get_sampler_view_components)(struct pipe_video_buffer *buffer); /** * get a individual surfaces for each plane diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c index cd2125fce63..c30cd07f434 100644 --- a/src/gallium/state_trackers/vdpau/surface.c +++ b/src/gallium/state_trackers/vdpau/surface.c @@ -176,7 +176,7 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface, return VDP_STATUS_NO_IMPLEMENTATION; } - sampler_views = p_surf->video_buffer->get_sampler_views(p_surf->video_buffer); + sampler_views = p_surf->video_buffer->get_sampler_view_planes(p_surf->video_buffer); if (!sampler_views) return VDP_STATUS_RESOURCES; diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c index 7429fdfcfb9..cfa15e120d9 100644 --- a/src/gallium/state_trackers/xorg/xvmc/surface.c +++ b/src/gallium/state_trackers/xorg/xvmc/surface.c @@ -306,7 +306,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder); surface_priv->mv_stride = surface_priv->decode_buffer->get_mv_stream_stride(surface_priv->decode_buffer); - surface_priv->video_buffer = vpipe->create_buffer(vpipe, PIPE_FORMAT_YV12, //TODO + surface_priv->video_buffer = vpipe->create_buffer(vpipe, PIPE_FORMAT_NV12, context_priv->decoder->chroma_format, context_priv->decoder->width, context_priv->decoder->height); -- cgit v1.2.3 From 7f04fe5338d0846ec9a6003033da5357d2785c8b Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 25 Apr 2011 01:20:15 +0200 Subject: [g3dvl] merge idct stage 2 and mc ycbcr stage into a single draw --- src/gallium/auxiliary/vl/vl_idct.c | 312 +++++++++++++-------------- src/gallium/auxiliary/vl/vl_idct.h | 52 +++-- src/gallium/auxiliary/vl/vl_mc.c | 67 +++--- src/gallium/auxiliary/vl/vl_mc.h | 22 +- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 145 +++++++++---- src/gallium/auxiliary/vl/vl_mpeg12_decoder.h | 4 +- 6 files changed, 344 insertions(+), 258 deletions(-) (limited to 'src/gallium/auxiliary/vl/vl_mc.h') diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index ca3b1cb53ab..ebb4ad4fe0c 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -86,8 +86,54 @@ calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], ureg_MOV(shader, ureg_writemask(addr[1], TGSI_WRITEMASK_Z), tc); } +static void +increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], + struct ureg_src saddr[2], bool right_side, bool transposed, + int pos, float size) +{ + unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; + unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; + + /* + * daddr[0..1].(start) = saddr[0..1].(start) + * daddr[0..1].(tc) = saddr[0..1].(tc) + */ + + ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); + ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); + ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); + ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); +} + +static void +fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler) +{ + ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler); + ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler); +} + +static void +matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) +{ + struct ureg_dst tmp; + + tmp = ureg_DECL_temporary(shader); + + /* + * tmp.xy = dot4(m[0][0..1], m[1][0..1]) + * dst = tmp.x + tmp.y + */ + ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); + ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); + ureg_ADD(shader, dst, + ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); + + ureg_release_temporary(shader, tmp); +} + static void * -create_vert_shader(struct vl_idct *idct, bool matrix_stage) +create_stage1_vert_shader(struct vl_idct *idct) { struct ureg_program *shader; struct ureg_src vrect, vpos; @@ -99,12 +145,12 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage) if (!shader) return NULL; - t_tex = ureg_DECL_temporary(shader); - t_start = ureg_DECL_temporary(shader); - vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + t_tex = ureg_DECL_temporary(shader); + t_start = ureg_DECL_temporary(shader); + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); @@ -131,22 +177,17 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage) ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); - ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), - ureg_scalar(vrect, TGSI_SWIZZLE_X), - ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets)); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); + ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), + ureg_scalar(vrect, TGSI_SWIZZLE_X), + ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets)); ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); - if(matrix_stage) { - calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); - calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); - } else { - calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); - calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4); - } + calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); + calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); ureg_release_temporary(shader, t_tex); ureg_release_temporary(shader, t_start); @@ -156,54 +197,8 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage) return ureg_create_shader_and_destroy(shader, idct->pipe); } -static void -increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], - struct ureg_src saddr[2], bool right_side, bool transposed, - int pos, float size) -{ - unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; - unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; - - /* - * daddr[0..1].(start) = saddr[0..1].(start) - * daddr[0..1].(tc) = saddr[0..1].(tc) - */ - - ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); - ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); - ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); - ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); -} - -static void -fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler) -{ - ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler); - ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler); -} - -static void -matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) -{ - struct ureg_dst tmp; - - tmp = ureg_DECL_temporary(shader); - - /* - * tmp.xy = dot4(m[0][0..1], m[1][0..1]) - * dst = tmp.x + tmp.y - */ - ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); - ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); - ureg_ADD(shader, dst, - ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), - ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); - - ureg_release_temporary(shader, tmp); -} - static void * -create_matrix_frag_shader(struct vl_idct *idct) +create_stage1_frag_shader(struct vl_idct *idct) { struct ureg_program *shader; @@ -272,25 +267,56 @@ create_matrix_frag_shader(struct vl_idct *idct) return ureg_create_shader_and_destroy(shader, idct->pipe); } -static void * -create_transpose_frag_shader(struct vl_idct *idct) +void +vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_output, struct ureg_dst tex) { - struct ureg_program *shader; + struct ureg_src vrect, vpos; + struct ureg_src scale; + struct ureg_dst t_start; + struct ureg_dst o_l_addr[2], o_r_addr[2]; + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + t_start = ureg_DECL_temporary(shader); + + --first_output; + + o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0); + o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1); + + o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0); + o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1); + + scale = ureg_imm2f(shader, + (float)BLOCK_WIDTH / idct->buffer_width, + (float)BLOCK_HEIGHT / idct->buffer_height); + + ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z), + ureg_scalar(vrect, TGSI_SWIZZLE_X), + ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets)); + ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); + + calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); + calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4); +} + +void +vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_input, struct ureg_dst fragment) +{ struct ureg_src l_addr[2], r_addr[2]; struct ureg_dst l[2], r[2]; - struct ureg_dst fragment; - shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); - if (!shader) - return NULL; + --first_input; - l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); - l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); + l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); + l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); - r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); - r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); + r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); + r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); l[0] = ureg_DECL_temporary(shader); l[1] = ureg_DECL_temporary(shader); @@ -300,61 +326,39 @@ create_transpose_frag_shader(struct vl_idct *idct) fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0)); fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1)); - fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); - - matrix_mul(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), l, r); + matrix_mul(shader, fragment, l, r); ureg_release_temporary(shader, l[0]); ureg_release_temporary(shader, l[1]); ureg_release_temporary(shader, r[0]); ureg_release_temporary(shader, r[1]); - - ureg_END(shader); - - return ureg_create_shader_and_destroy(shader, idct->pipe); } static bool init_shaders(struct vl_idct *idct) { - idct->matrix_vs = create_vert_shader(idct, true); - if (!idct->matrix_vs) - goto error_matrix_vs; - - idct->matrix_fs = create_matrix_frag_shader(idct); - if (!idct->matrix_fs) - goto error_matrix_fs; + idct->vs = create_stage1_vert_shader(idct); + if (!idct->vs) + goto error_vs; - idct->transpose_vs = create_vert_shader(idct, false); - if (!idct->transpose_vs) - goto error_transpose_vs; - - idct->transpose_fs = create_transpose_frag_shader(idct); - if (!idct->transpose_fs) - goto error_transpose_fs; + idct->fs = create_stage1_frag_shader(idct); + if (!idct->fs) + goto error_fs; return true; -error_transpose_fs: - idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs); - -error_transpose_vs: - idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); - -error_matrix_fs: - idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs); +error_fs: + idct->pipe->delete_vs_state(idct->pipe, idct->vs); -error_matrix_vs: +error_vs: return false; } static void cleanup_shaders(struct vl_idct *idct) { - idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs); - idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); - idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs); - idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs); + idct->pipe->delete_vs_state(idct->pipe, idct->vs); + idct->pipe->delete_fs_state(idct->pipe, idct->fs); } static bool @@ -447,30 +451,30 @@ init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) tex = buffer->sampler_views.individual.intermediate->texture; - buffer->fb_state[0].width = tex->width0; - buffer->fb_state[0].height = tex->height0; - buffer->fb_state[0].nr_cbufs = idct->nr_of_render_targets; + buffer->fb_state.width = tex->width0; + buffer->fb_state.height = tex->height0; + buffer->fb_state.nr_cbufs = idct->nr_of_render_targets; for(i = 0; i < idct->nr_of_render_targets; ++i) { memset(&surf_templ, 0, sizeof(surf_templ)); surf_templ.format = tex->format; surf_templ.u.tex.first_layer = i; surf_templ.u.tex.last_layer = i; surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; - buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface( + buffer->fb_state.cbufs[i] = idct->pipe->create_surface( idct->pipe, tex, &surf_templ); - if (!buffer->fb_state[0].cbufs[i]) + if (!buffer->fb_state.cbufs[i]) goto error_surfaces; } - buffer->viewport[0].scale[0] = tex->width0; - buffer->viewport[0].scale[1] = tex->height0; + buffer->viewport.scale[0] = tex->width0; + buffer->viewport.scale[1] = tex->height0; return true; error_surfaces: for(i = 0; i < idct->nr_of_render_targets; ++i) - pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); + pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); return false; } @@ -483,7 +487,7 @@ cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) assert(idct && buffer); for(i = 0; i < idct->nr_of_render_targets; ++i) - pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); + pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); } @@ -607,13 +611,13 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_sampler_view *intermediate, struct pipe_surface *destination) { - unsigned i; - assert(buffer); assert(idct); assert(source); assert(destination); + memset(buffer, 0, sizeof(struct vl_idct_buffer)); + pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix); pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source); pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose); @@ -622,25 +626,12 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, if (!init_intermediate(idct, buffer)) return false; - /* init state */ - buffer->fb_state[1].width = destination->texture->width0; - buffer->fb_state[1].height = destination->texture->height0; - buffer->fb_state[1].nr_cbufs = 1; - pipe_surface_reference(&buffer->fb_state[1].cbufs[0], destination); - - buffer->viewport[1].scale[0] = destination->texture->width0; - buffer->viewport[1].scale[1] = destination->texture->height0; - - for(i = 0; i < 2; ++i) { - buffer->viewport[i].scale[2] = 1; - buffer->viewport[i].scale[3] = 1; - buffer->viewport[i].translate[0] = 0; - buffer->viewport[i].translate[1] = 0; - buffer->viewport[i].translate[2] = 0; - buffer->viewport[i].translate[3] = 0; - - buffer->fb_state[i].zsbuf = NULL; - } + buffer->viewport.scale[2] = 1; + buffer->viewport.scale[3] = 1; + buffer->viewport.translate[0] = 0; + buffer->viewport.translate[1] = 0; + buffer->viewport.translate[2] = 0; + buffer->viewport.translate[3] = 0; return true; } @@ -653,9 +644,7 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer) assert(idct && buffer); for(i = 0; i < idct->nr_of_render_targets; ++i) - pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); - - pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL); + pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); cleanup_intermediate(idct, buffer); } @@ -666,25 +655,28 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_ assert(idct); assert(buffer); - if(num_instances > 0) { - idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); - idct->pipe->bind_blend_state(idct->pipe, idct->blend); - idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); - - /* first stage */ - idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]); - idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]); - idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); - idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs); - idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); - util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); - - /* second stage */ - idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]); - idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]); - idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]); - idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs); - idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); - util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); - } + idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); + idct->pipe->bind_blend_state(idct->pipe, idct->blend); + idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); + + /* first stage */ + idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state); + idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport); + idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); + idct->pipe->bind_vs_state(idct->pipe, idct->vs); + idct->pipe->bind_fs_state(idct->pipe, idct->fs); + util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); +} + +void +vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer) +{ + assert(idct); + assert(buffer); + + /* second stage */ + idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); + idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); + idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]); } + diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h index 96933b9d889..f5a1e5d9b73 100644 --- a/src/gallium/auxiliary/vl/vl_idct.h +++ b/src/gallium/auxiliary/vl/vl_idct.h @@ -30,6 +30,8 @@ #include +#include + /* shader based inverse distinct cosinus transformation * expect usage of vl_vertex_buffers as a todo list */ @@ -46,8 +48,7 @@ struct vl_idct void *samplers[2]; - void *matrix_vs, *transpose_vs; - void *matrix_fs, *transpose_fs; + void *vs, *fs; struct pipe_sampler_view *matrix; struct pipe_sampler_view *transpose; @@ -56,8 +57,8 @@ struct vl_idct /* a set of buffers to work with */ struct vl_idct_buffer { - struct pipe_viewport_state viewport[2]; - struct pipe_framebuffer_state fb_state[2]; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state fb_state; union { @@ -71,28 +72,45 @@ struct vl_idct_buffer }; /* upload the idct matrix, which can be shared by all idct instances of a pipe */ -struct pipe_sampler_view *vl_idct_upload_matrix(struct pipe_context *pipe, float scale); +struct pipe_sampler_view * +vl_idct_upload_matrix(struct pipe_context *pipe, float scale); + +void +vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_output, struct ureg_dst tex); + +void +vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_input, struct ureg_dst fragment); /* init an idct instance */ -bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, - unsigned buffer_width, unsigned buffer_height, - unsigned nr_of_render_targets, - struct pipe_sampler_view *matrix, - struct pipe_sampler_view *transpose); +bool +vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, + unsigned buffer_width, unsigned buffer_height, + unsigned nr_of_render_targets, + struct pipe_sampler_view *matrix, + struct pipe_sampler_view *transpose); /* destroy an idct instance */ -void vl_idct_cleanup(struct vl_idct *idct); +void +vl_idct_cleanup(struct vl_idct *idct); /* init a buffer assosiated with agiven idct instance */ -bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, - struct pipe_sampler_view *source, - struct pipe_sampler_view *intermediate, - struct pipe_surface *destination); +bool +vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, + struct pipe_sampler_view *source, + struct pipe_sampler_view *intermediate, + struct pipe_surface *destination); /* cleanup a buffer of an idct instance */ -void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer); +void +vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer); /* flush the buffer and start rendering, vertex buffers needs to be setup before calling this */ -void vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_verts); +void +vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_verts); + +void +vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer); #endif diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index ecdce6b28bd..7474c58250d 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -37,12 +37,16 @@ #include "vl_defines.h" #include "vl_vertex_buffers.h" #include "vl_mc.h" +#include "vl_idct.h" enum VS_OUTPUT { VS_O_VPOS, VS_O_VTOP, - VS_O_VBOTTOM + VS_O_VBOTTOM, + + VS_O_FLAGS = VS_O_VTOP, + VS_O_VTEX = VS_O_VBOTTOM }; static struct ureg_dst @@ -220,13 +224,13 @@ create_ref_frag_shader(struct vl_mc *r) } static void * -create_ycbcr_vert_shader(struct vl_mc *r) +create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, void *callback_priv) { struct ureg_program *shader; struct ureg_src vrect, vpos; struct ureg_dst t_vpos, t_vtex; - struct ureg_dst o_vpos, o_vtex; + struct ureg_dst o_vpos, o_flags; struct vertex2f scale = { (float)BLOCK_WIDTH / r->buffer_width * MACROBLOCK_WIDTH / r->macroblock_size, @@ -246,11 +250,11 @@ create_ycbcr_vert_shader(struct vl_mc *r) t_vtex = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); - o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); + o_flags = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS); /* * o_vtex.xy = t_vpos - * o_vtex.z = intra * 0.5 + * o_flags.z = intra * 0.5 * * if(interlaced) { * t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 } @@ -258,14 +262,16 @@ create_ycbcr_vert_shader(struct vl_mc *r) * t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y * o_vpos.y = t_vtex.y + t_vpos.y * - * o_vtex.w = t_vtex.z ? 0 : 1 + * o_flags.w = t_vtex.z ? 0 : 1 * } * */ - ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); - ureg_MUL(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_Z), + + vs_callback(callback_priv, r, shader, VS_O_VTEX, t_vpos); + + ureg_MUL(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_Z), ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f)); - ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f)); + ureg_MOV(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f)); if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label); @@ -286,7 +292,7 @@ create_ycbcr_vert_shader(struct vl_mc *r) ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y), ureg_src(t_vpos), ureg_src(t_vtex)); - ureg_CMP(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W), + ureg_CMP(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f)); @@ -303,10 +309,10 @@ create_ycbcr_vert_shader(struct vl_mc *r) } static void * -create_ycbcr_frag_shader(struct vl_mc *r, float scale) +create_ycbcr_frag_shader(struct vl_mc *r, float scale, vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv) { struct ureg_program *shader; - struct ureg_src tc, sampler; + struct ureg_src flags; struct ureg_dst tmp; struct ureg_dst fragment; unsigned label; @@ -315,9 +321,7 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale) if (!shader) return NULL; - tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR); - - sampler = ureg_DECL_sampler(shader, 0); + flags = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS, TGSI_INTERPOLATE_LINEAR); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); @@ -333,7 +337,7 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale) */ ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), - ureg_scalar(tc, TGSI_SWIZZLE_W), ureg_src(tmp)); + ureg_scalar(flags, TGSI_SWIZZLE_W), ureg_src(tmp)); ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label); @@ -342,15 +346,15 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale) ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ELSE(shader, &label); - ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc, sampler); + fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp); if (scale != 1.0f) ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, scale), - ureg_scalar(tc, TGSI_SWIZZLE_Z)); + ureg_scalar(flags, TGSI_SWIZZLE_Z)); else ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), - ureg_src(tmp), ureg_scalar(tc, TGSI_SWIZZLE_Z)); + ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z)); ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); @@ -386,12 +390,6 @@ init_pipe_state(struct vl_mc *r) if (!r->sampler_ref) goto error_sampler_ref; - sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - r->sampler_ycbcr = r->pipe->create_sampler_state(r->pipe, &sampler); - if (!r->sampler_ycbcr) - goto error_sampler_ycbcr; - for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { memset(&blend, 0, sizeof blend); blend.independent_blend_enable = 0; @@ -442,9 +440,6 @@ error_blend: r->pipe->delete_sampler_state(r->pipe, r->sampler_ref); error_sampler_ref: - r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr); - -error_sampler_ycbcr: return false; } @@ -456,7 +451,6 @@ cleanup_pipe_state(struct vl_mc *r) assert(r); r->pipe->delete_sampler_state(r->pipe, r->sampler_ref); - r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr); for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]); r->pipe->delete_blend_state(r->pipe, r->blend_add[i]); @@ -467,7 +461,10 @@ cleanup_pipe_state(struct vl_mc *r) bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, unsigned buffer_width, unsigned buffer_height, - unsigned macroblock_size, float scale) + unsigned macroblock_size, float scale, + vl_mc_ycbcr_vert_shader vs_callback, + vl_mc_ycbcr_frag_shader fs_callback, + void *callback_priv) { assert(renderer); assert(pipe); @@ -486,7 +483,7 @@ vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, if (!renderer->vs_ref) goto error_vs_ref; - renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer); + renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer, vs_callback, callback_priv); if (!renderer->vs_ycbcr) goto error_vs_ycbcr; @@ -494,7 +491,7 @@ vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, if (!renderer->fs_ref) goto error_fs_ref; - renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale); + renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale, fs_callback, callback_priv); if (!renderer->fs_ycbcr) goto error_fs_ycbcr; @@ -614,8 +611,7 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref) } void -vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source, - unsigned component, unsigned num_instances) +vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances) { struct vl_mc *renderer; @@ -631,8 +627,5 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr); renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr); - renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &source); - renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ycbcr); - util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); } diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h index 353afa9df62..85ec69b3ce7 100644 --- a/src/gallium/auxiliary/vl/vl_mc.h +++ b/src/gallium/auxiliary/vl/vl_mc.h @@ -31,6 +31,8 @@ #include #include +#include + #include "vl_defines.h" #include "vl_types.h" @@ -51,7 +53,7 @@ struct vl_mc void *blend_add[VL_MC_NUM_BLENDERS]; void *vs_ref, *vs_ycbcr; void *fs_ref, *fs_ycbcr; - void *sampler_ref, *sampler_ycbcr; + void *sampler_ref; }; struct vl_mc_buffer @@ -64,9 +66,22 @@ struct vl_mc_buffer struct pipe_framebuffer_state fb_state; }; +typedef void (*vl_mc_ycbcr_vert_shader)(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_output, + struct ureg_dst tex); + +typedef void (*vl_mc_ycbcr_frag_shader)(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_input, + struct ureg_dst dst); + bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, unsigned picture_width, unsigned picture_height, - unsigned macroblock_size, float scale); + unsigned macroblock_size, float scale, + vl_mc_ycbcr_vert_shader vs_callback, + vl_mc_ycbcr_frag_shader fs_callback, + void *callback_priv); void vl_mc_cleanup(struct vl_mc *renderer); @@ -78,7 +93,6 @@ void vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref); -void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source, - unsigned component, unsigned num_instances); +void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances); #endif /* vl_mc_h */ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 37789707a6b..74ec4b1db7b 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -145,7 +145,7 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer) { enum pipe_format formats[3]; - struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv; + struct pipe_sampler_view **idct_source_sv, **mc_source_sv; struct pipe_surface **idct_surfaces; struct vl_mpeg12_decoder *dec; @@ -164,23 +164,23 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer) if (!buffer->idct_source) goto error_source; - formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format; - buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe, - dec->base.width / dec->nr_of_idct_render_targets, - dec->base.height / 4, dec->nr_of_idct_render_targets, - dec->base.chroma_format, - formats, PIPE_USAGE_STATIC); + formats[0] = formats[1] = formats[2] = dec->mc_source_format; + buffer->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe, + dec->base.width / dec->nr_of_idct_render_targets, + dec->base.height / 4, dec->nr_of_idct_render_targets, + dec->base.chroma_format, + formats, PIPE_USAGE_STATIC); - if (!buffer->idct_intermediate) - goto error_intermediate; + if (!buffer->mc_source) + goto error_mc_source; idct_source_sv = buffer->idct_source->get_sampler_view_planes(buffer->idct_source); if (!idct_source_sv) goto error_source_sv; - idct_intermediate_sv = buffer->idct_intermediate->get_sampler_view_planes(buffer->idct_intermediate); - if (!idct_intermediate_sv) - goto error_intermediate_sv; + mc_source_sv = buffer->mc_source->get_sampler_view_planes(buffer->mc_source); + if (!mc_source_sv) + goto error_mc_source_sv; idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source); if (!idct_surfaces) @@ -189,7 +189,7 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer) for (i = 0; i < 3; ++i) if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c, &buffer->idct[i], idct_source_sv[i], - idct_intermediate_sv[i], idct_surfaces[i])) + mc_source_sv[i], idct_surfaces[i])) goto error_plane; return true; @@ -199,11 +199,11 @@ error_plane: vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]); error_surfaces: -error_intermediate_sv: +error_mc_source_sv: error_source_sv: - buffer->idct_intermediate->destroy(buffer->idct_intermediate); + buffer->mc_source->destroy(buffer->mc_source); -error_intermediate: +error_mc_source: buffer->idct_source->destroy(buffer->idct_source); error_source: @@ -223,7 +223,6 @@ cleanup_idct_buffer(struct vl_mpeg12_buffer *buf) vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]); vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]); buf->idct_source->destroy(buf->idct_source); - buf->idct_intermediate->destroy(buf->idct_intermediate); } static bool @@ -237,14 +236,16 @@ init_mc_buffer(struct vl_mpeg12_buffer *buf) dec = (struct vl_mpeg12_decoder*)buf->base.decoder; assert(dec); - formats[0] = formats[1] = formats[2] =dec->mc_source_format; - buf->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe, - dec->base.width, dec->base.height, 1, - dec->base.chroma_format, - formats, PIPE_USAGE_STATIC); + if (dec->base.entrypoint > PIPE_VIDEO_ENTRYPOINT_IDCT) { + formats[0] = formats[1] = formats[2] =dec->mc_source_format; + buf->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe, + dec->base.width, dec->base.height, 1, + dec->base.chroma_format, + formats, PIPE_USAGE_STATIC); - if (!buf->mc_source) - goto error_mc_source; + if (!buf->mc_source) + goto error_mc_source; + } if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0])) goto error_mc_y; @@ -420,6 +421,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) dec->pipe->bind_fs_state(dec->pipe, NULL); dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa); + dec->pipe->delete_sampler_state(dec->pipe, dec->sampler_ycbcr); vl_mc_cleanup(&dec->mc_y); vl_mc_cleanup(&dec->mc_c); @@ -563,7 +565,14 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component); dec->pipe->set_vertex_buffers(dec->pipe, 2, vb); - vl_mc_render_ycbcr(&buf->mc[i], mc_source_sv[component], j, num_ycbcr_blocks[component]); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + vl_idct_prepare_stage2(component == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[component]); + else { + dec->pipe->set_fragment_sampler_views(dec->pipe, 1, &mc_source_sv[component]); + dec->pipe->bind_fragment_sampler_states(dec->pipe, 1, &dec->sampler_ycbcr); + } + vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]); } } } @@ -572,6 +581,7 @@ static bool init_pipe_state(struct vl_mpeg12_decoder *dec) { struct pipe_depth_stencil_alpha_state dsa; + struct pipe_sampler_state sampler; unsigned i; assert(dec); @@ -595,6 +605,20 @@ init_pipe_state(struct vl_mpeg12_decoder *dec) dec->dsa = dec->pipe->create_depth_stencil_alpha_state(dec->pipe, &dsa); dec->pipe->bind_depth_stencil_alpha_state(dec->pipe, dec->dsa); + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + dec->sampler_ycbcr = dec->pipe->create_sampler_state(dec->pipe, &sampler); + if (!dec->sampler_ycbcr) + return false; + return true; } @@ -675,10 +699,10 @@ init_idct(struct vl_mpeg12_decoder *dec) if (dec->idct_source_format == PIPE_FORMAT_NONE) return false; - dec->idct_intermediate_format = find_first_supported_format(dec, const_idct_intermediate_formats, - num_idct_intermediate_formats, PIPE_TEXTURE_3D); + dec->mc_source_format = find_first_supported_format(dec, const_idct_intermediate_formats, + num_idct_intermediate_formats, PIPE_TEXTURE_3D); - if (dec->idct_intermediate_format == PIPE_FORMAT_NONE) + if (dec->mc_source_format == PIPE_FORMAT_NONE) return false; switch (dec->idct_source_format) { @@ -695,8 +719,8 @@ init_idct(struct vl_mpeg12_decoder *dec) return false; } - if (dec->idct_intermediate_format == PIPE_FORMAT_R16G16B16A16_FLOAT || - dec->idct_intermediate_format == PIPE_FORMAT_R32G32B32A32_FLOAT) + if (dec->mc_source_format == PIPE_FORMAT_R16G16B16A16_FLOAT || + dec->mc_source_format == PIPE_FORMAT_R32G32B32A32_FLOAT) transpose_scale = 1.0f; else transpose_scale = matrix_scale = sqrt(matrix_scale); @@ -738,6 +762,49 @@ error_matrix: return false; } +static void +mc_vert_shader_callback(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_output, + struct ureg_dst tex) +{ + struct vl_mpeg12_decoder *dec = priv; + struct ureg_dst o_vtex; + + assert(priv && mc); + assert(shader); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) { + struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c; + vl_idct_stage2_vert_shader(idct, shader, first_output, tex); + } else { + o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output); + ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(tex)); + } +} + +static void +mc_frag_shader_callback(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_input, + struct ureg_dst dst) +{ + struct vl_mpeg12_decoder *dec = priv; + struct ureg_src src, sampler; + + assert(priv && mc); + assert(shader); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) { + struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c; + vl_idct_stage2_frag_shader(idct, shader, first_input, dst); + } else { + src = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input, TGSI_INTERPOLATE_LINEAR); + sampler = ureg_DECL_sampler(shader, 0); + ureg_TEX(shader, dst, TGSI_TEXTURE_2D, src, sampler); + } +} + struct pipe_video_decoder * vl_create_mpeg12_decoder(struct pipe_video_context *context, struct pipe_context *pipe, @@ -785,12 +852,6 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context, /* TODO: Implement 422, 444 */ assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); - dec->mc_source_format = find_first_supported_format(dec, const_mc_source_formats, - num_mc_source_formats, PIPE_TEXTURE_3D); - - if (dec->mc_source_format == PIPE_FORMAT_NONE) - return NULL; - if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { dec->chroma_width = dec->base.width / 2; dec->chroma_height = dec->base.height / 2; @@ -813,6 +874,12 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context, else mc_scale = 1.0f; } else { + dec->mc_source_format = find_first_supported_format(dec, const_mc_source_formats, + num_mc_source_formats, PIPE_TEXTURE_3D); + + if (dec->mc_source_format == PIPE_FORMAT_NONE) + return NULL; + switch (dec->mc_source_format) { case PIPE_FORMAT_R16_SNORM: mc_scale = SCALE_FACTOR_SNORM; @@ -828,11 +895,13 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context, } } - if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, mc_scale)) + if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, mc_scale, + mc_vert_shader_callback, mc_frag_shader_callback, dec)) goto error_mc_y; // TODO - if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, mc_scale)) + if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, mc_scale, + mc_vert_shader_callback, mc_frag_shader_callback, dec)) goto error_mc_c; if (!init_pipe_state(dec)) diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h index 9d5768816fb..e483ace03b4 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h @@ -53,7 +53,6 @@ struct vl_mpeg12_decoder enum pipe_format zscan_source_format; enum pipe_format idct_source_format; - enum pipe_format idct_intermediate_format; enum pipe_format mc_source_format; struct pipe_vertex_buffer quads; @@ -62,6 +61,8 @@ struct vl_mpeg12_decoder void *ves_ycbcr; void *ves_mv; + void *sampler_ycbcr; + struct vl_zscan zscan_y, zscan_c; struct vl_idct idct_y, idct_c; struct vl_mc mc_y, mc_c; @@ -77,7 +78,6 @@ struct vl_mpeg12_buffer struct pipe_video_buffer *zscan_source; struct pipe_video_buffer *idct_source; - struct pipe_video_buffer *idct_intermediate; struct pipe_video_buffer *mc_source; struct vl_zscan_buffer zscan[VL_MAX_PLANES]; -- cgit v1.2.3 From c5110a1bfab8e97b8b958d42dd294a426310d1e2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 13 Jul 2011 16:07:30 +0200 Subject: [g3dvl] implement workaround for missing blender clamp control It's about 20% slower, but should at least work with every hardware. --- src/gallium/auxiliary/vl/vl_mc.c | 39 +++++++++++++++++++++++++++++++++------ src/gallium/auxiliary/vl/vl_mc.h | 3 ++- 2 files changed, 35 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary/vl/vl_mc.h') diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index 3b665fafb7d..bd05205b52d 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -309,7 +309,8 @@ create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, v } static void * -create_ycbcr_frag_shader(struct vl_mc *r, float scale, vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv) +create_ycbcr_frag_shader(struct vl_mc *r, float scale, bool invert, + vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv) { struct ureg_program *shader; struct ureg_src flags; @@ -349,13 +350,14 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale, vl_mc_ycbcr_frag_shader f fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp); if (scale != 1.0f) - ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), + ureg_MAD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, scale), ureg_scalar(flags, TGSI_SWIZZLE_Z)); else - ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), + ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z)); - + + ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, invert ? -1.0f : 1.0f)); ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); @@ -415,6 +417,12 @@ init_pipe_state(struct vl_mc *r) r->blend_add[i] = r->pipe->create_blend_state(r->pipe, &blend); if (!r->blend_add[i]) goto error_blend; + + blend.rt[0].rgb_func = PIPE_BLEND_REVERSE_SUBTRACT; + blend.rt[0].alpha_dst_factor = PIPE_BLEND_REVERSE_SUBTRACT; + r->blend_sub[i] = r->pipe->create_blend_state(r->pipe, &blend); + if (!r->blend_sub[i]) + goto error_blend; } memset(&rs_state, 0, sizeof(rs_state)); @@ -432,6 +440,9 @@ init_pipe_state(struct vl_mc *r) error_rs_state: error_blend: for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { + if (r->blend_sub[i]) + r->pipe->delete_blend_state(r->pipe, r->blend_sub[i]); + if (r->blend_add[i]) r->pipe->delete_blend_state(r->pipe, r->blend_add[i]); @@ -456,6 +467,7 @@ cleanup_pipe_state(struct vl_mc *r) for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]); r->pipe->delete_blend_state(r->pipe, r->blend_add[i]); + r->pipe->delete_blend_state(r->pipe, r->blend_sub[i]); } r->pipe->delete_rasterizer_state(r->pipe, r->rs_state); } @@ -493,11 +505,18 @@ vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, if (!renderer->fs_ref) goto error_fs_ref; - renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale, fs_callback, callback_priv); + renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale, false, fs_callback, callback_priv); if (!renderer->fs_ycbcr) goto error_fs_ycbcr; + renderer->fs_ycbcr_sub = create_ycbcr_frag_shader(renderer, scale, true, fs_callback, callback_priv); + if (!renderer->fs_ycbcr_sub) + goto error_fs_ycbcr_sub; + return true; + +error_fs_ycbcr_sub: + renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr); error_fs_ycbcr: renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref); @@ -526,6 +545,7 @@ vl_mc_cleanup(struct vl_mc *renderer) renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr); renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref); renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr); + renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr_sub); } bool @@ -616,13 +636,14 @@ void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances) { struct vl_mc *renderer; + unsigned mask = 1 << component; assert(buffer); if (num_instances == 0) return; - prepare_pipe_4_rendering(buffer, 1 << component); + prepare_pipe_4_rendering(buffer, mask); renderer = buffer->renderer; @@ -630,4 +651,10 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr); util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); + + if (buffer->surface_cleared) { + renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_sub[mask]); + renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr_sub); + util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); + } } diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h index 85ec69b3ce7..9fabf02a3ac 100644 --- a/src/gallium/auxiliary/vl/vl_mc.h +++ b/src/gallium/auxiliary/vl/vl_mc.h @@ -51,8 +51,9 @@ struct vl_mc void *blend_clear[VL_MC_NUM_BLENDERS]; void *blend_add[VL_MC_NUM_BLENDERS]; + void *blend_sub[VL_MC_NUM_BLENDERS]; void *vs_ref, *vs_ycbcr; - void *fs_ref, *fs_ycbcr; + void *fs_ref, *fs_ycbcr, *fs_ycbcr_sub; void *sampler_ref; }; -- cgit v1.2.3