diff options
author | Boyuan Zhang <[email protected]> | 2015-07-08 16:54:48 -0400 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2015-08-14 15:02:31 +0200 |
commit | 839bf82606ae9c7b1c7d8d5055ab5e3cadae9bf9 (patch) | |
tree | 9307491fc3189e31e067f31013d316ee91f15941 /src/gallium/drivers/radeon/radeon_uvd.c | |
parent | 0654a9ca17c17fe140f70d126c878a0ce4736b76 (diff) |
radeon/uvd: implement HEVC support
add context buffer to fix H265 uvd decode issue.
fix H265 corruption issue caused by incorrect assigned ref_pic_list.
v2: disable interlace for HEVC
add CZ sps flag workaround
fix coding style
Signed-off-by: Christian König <[email protected]>
Signed-off-by: Boyuan Zhang <[email protected]>
Reviewed-by: Leo Liu <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeon/radeon_uvd.c')
-rw-r--r-- | src/gallium/drivers/radeon/radeon_uvd.c | 242 |
1 files changed, 225 insertions, 17 deletions
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 375b5c06e12..16ee5410273 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -57,7 +57,7 @@ #define FB_BUFFER_OFFSET 0x1000 #define FB_BUFFER_SIZE 2048 -#define IT_SCALING_TABLE_SIZE 224 +#define IT_SCALING_TABLE_SIZE 992 /* UVD decoder representation */ struct ruvd_decoder { @@ -86,6 +86,7 @@ struct ruvd_decoder { struct rvid_buffer dpb; bool use_legacy; + struct rvid_buffer ctx; }; /* flush IB to the hardware */ @@ -124,6 +125,13 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1); } +/* do the codec needs an IT buffer ?*/ +static bool have_it(struct ruvd_decoder *dec) +{ + return dec->stream_type == RUVD_CODEC_H264_PERF || + dec->stream_type == RUVD_CODEC_H265; +} + /* map the next available message/feedback/itscaling buffer */ static void map_msg_fb_it_buf(struct ruvd_decoder *dec) { @@ -139,7 +147,7 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec) /* calc buffer offsets */ dec->msg = (struct ruvd_msg *)ptr; dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); - if (dec->stream_type == RUVD_CODEC_H264_PERF) + if (have_it(dec)) dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE); } @@ -159,8 +167,7 @@ static void send_msg_buf(struct ruvd_decoder *dec) dec->ws->buffer_unmap(buf->res->cs_buf); dec->msg = NULL; dec->fb = NULL; - if (dec->stream_type == RUVD_CODEC_H264_PERF) - dec->it = NULL; + dec->it = NULL; /* and send it to the hardware */ send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->cs_buf, 0, @@ -191,12 +198,35 @@ static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family) case PIPE_VIDEO_FORMAT_MPEG4: return RUVD_CODEC_MPEG4; + case PIPE_VIDEO_FORMAT_HEVC: + return RUVD_CODEC_H265; + default: assert(0); return 0; } } +static unsigned calc_ctx_size(struct ruvd_decoder *dec) +{ + unsigned width_in_mb, height_in_mb, ctx_size; + + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + unsigned max_references = dec->base.max_references + 1; + + if (dec->base.width * dec->base.height >= 4096*2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align (width, 16); + height = align (height, 16); + ctx_size = ((width + 255) / 16)*((height + 255) / 16) * 16 * max_references + 52 * 1024; + return ctx_size; +} + /* calculate size of reference picture buffer */ static unsigned calc_dpb_size(struct ruvd_decoder *dec) { @@ -270,6 +300,17 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec) break; } + case PIPE_VIDEO_FORMAT_HEVC: + if (dec->base.width * dec->base.height >= 4096*2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align (width, 16); + height = align (height, 16); + dpb_size = align((width * height * 3) / 2, 256) * max_references; + break; + case PIPE_VIDEO_FORMAT_VC1: // the firmware seems to allways assume a minimum of ref frames max_references = MAX2(NUM_VC1_REFS, max_references); @@ -319,6 +360,12 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec) return dpb_size; } +/* free associated data in the video buffer callback */ +static void ruvd_destroy_associated_data(void *data) +{ + /* NOOP, since we only use an intptr */ +} + /* get h264 specific message bits */ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic) { @@ -392,6 +439,11 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_ memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16); memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64); + if (dec->stream_type == RUVD_CODEC_H264_PERF) { + memcpy(dec->it, result.scaling_list_4x4, 6*16); + memcpy((dec->it + 96), result.scaling_list_8x8, 2*64); + } + result.num_ref_frames = pic->num_ref_frames; result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; @@ -408,6 +460,151 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_ return result; } +/* get h265 specific message bits */ +static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target, + struct pipe_h265_picture_desc *pic) +{ + struct ruvd_h265 result; + unsigned i; + + memset(&result, 0, sizeof(result)); + + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; + result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; + result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; + result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; + result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; + result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; + result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; + result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; + result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; + if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO) + result.sps_info_flags |= 1 << 9; + + result.chroma_format = pic->pps->sps->chroma_format_idc; + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; + result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3; + result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2; + result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size; + result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter; + result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra; + result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; + result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; + result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; + result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; + result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; + result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; + result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; + result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; + result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; + result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; + result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; + result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; + result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; + result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; + result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; + result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; + result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; + result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; + result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; + result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; + result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; + result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; + //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ??? + + result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; + result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; + result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; + result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; + result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; + result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; + result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; + result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; + result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; + result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; + result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; + result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; + result.init_qp_minus26 = pic->pps->init_qp_minus26; + + for (i = 0; i < 19; ++i) + result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; + + for (i = 0; i < 21; ++i) + result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; + + result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; + result.curr_idx = pic->CurrPicOrderCntVal; + result.curr_poc = pic->CurrPicOrderCntVal; + + vl_video_buffer_set_associated_data(target, &dec->base, + (void *)(uintptr_t)pic->CurrPicOrderCntVal, + &ruvd_destroy_associated_data); + + for (i = 0; i < 16; ++i) { + struct pipe_video_buffer *ref = pic->ref[i]; + uintptr_t ref_pic = 0; + + result.poc_list[i] = pic->PicOrderCntVal[i]; + + if (ref) + ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + else + ref_pic = 0x7F; + result.ref_pic_list[i] = ref_pic; + } + + for (i = 0; i < 8; ++i) { + result.ref_pic_set_st_curr_before[i] = 0xFF; + result.ref_pic_set_st_curr_after[i] = 0xFF; + result.ref_pic_set_lt_curr[i] = 0xFF; + } + + for (i = 0; i < pic->NumPocStCurrBefore; ++i) + result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; + + for (i = 0; i < pic->NumPocStCurrAfter; ++i) + result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; + + for (i = 0; i < pic->NumPocLtCurr; ++i) + result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; + + for (i = 0; i < 6; ++i) + result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; + + for (i = 0; i < 2; ++i) + result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; + + memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); + memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); + memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); + memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); + + /* TODO + result.highestTid; + result.isNonRef; + + IDRPicFlag; + RAPPicFlag; + NumPocTotalCurr; + NumShortTermPictureSliceHeaderBits; + NumLongTermPictureSliceHeaderBits; + + IsLongTerm[16]; + */ + + return result; +} + /* get vc1 specific message bits */ static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic) { @@ -627,16 +824,12 @@ static void ruvd_destroy(struct pipe_video_codec *decoder) } rvid_destroy_buffer(&dec->dpb); + if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC) + rvid_destroy_buffer(&dec->ctx); FREE(dec); } -/* free associated data in the video buffer callback */ -static void ruvd_destroy_associated_data(void *data) -{ - /* NOOP, since we only use an intptr */ -} - /** * start decoding of a new frame */ @@ -759,10 +952,10 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder, switch (u_reduce_video_profile(picture->profile)) { case PIPE_VIDEO_FORMAT_MPEG4_AVC: dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture); - if (dec->stream_type == RUVD_CODEC_H264_PERF) { - memcpy(dec->it, dec->msg->body.decode.codec.h264.scaling_list_4x4, 6*16); - memcpy((dec->it + 96), dec->msg->body.decode.codec.h264.scaling_list_8x8, 2*64); - } + break; + + case PIPE_VIDEO_FORMAT_HEVC: + dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture); break; case PIPE_VIDEO_FORMAT_VC1: @@ -792,13 +985,17 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder, send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->cs_buf, 0, RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); + if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) { + send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->cs_buf, 0, + RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); + } send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->cs_buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->cs_buf, FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); - if (dec->stream_type == RUVD_CODEC_H264_PERF) + if (have_it(dec)) send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->cs_buf, FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); set_reg(dec, RUVD_ENGINE_CNTL, 1); @@ -884,7 +1081,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, for (i = 0; i < NUM_BUFFERS; ++i) { unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE; STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET); - if (dec->stream_type == RUVD_CODEC_H264_PERF) + if (have_it(dec)) msg_fb_it_size += IT_SCALING_TABLE_SIZE; if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i], msg_fb_it_size, PIPE_USAGE_STAGING)) { @@ -911,6 +1108,15 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, rvid_clear_buffer(context, &dec->dpb); + if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC) { + unsigned ctx_size = calc_ctx_size(dec); + if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated context buffer.\n"); + goto error; + } + rvid_clear_buffer(context, &dec->ctx); + } + map_msg_fb_it_buf(dec); dec->msg->size = sizeof(*dec->msg); dec->msg->msg_type = RUVD_MSG_CREATE; @@ -918,7 +1124,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, dec->msg->body.create.stream_type = dec->stream_type; dec->msg->body.create.width_in_samples = dec->base.width; dec->msg->body.create.height_in_samples = dec->base.height; - dec->msg->body.create.dpb_size = dec->dpb.res->buf->size; + dec->msg->body.create.dpb_size = dpb_size; send_msg_buf(dec); flush(dec); next_buffer(dec); @@ -934,6 +1140,8 @@ error: } rvid_destroy_buffer(&dec->dpb); + if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC) + rvid_destroy_buffer(&dec->ctx); FREE(dec); |