diff options
author | Pierre-Eric Pelloux-Prayer <[email protected]> | 2020-03-27 20:42:29 +0100 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-03-30 11:05:52 +0000 |
commit | 716a065ac05b2347054077aea389d3c877585b6f (patch) | |
tree | 2b157eedf5696a29ceebe5ee1ba58fa33d212b13 | |
parent | d7008fe46a8f689ce4ee2b14b61dc39baebccaa8 (diff) |
radeon: switch to 3-spaces style
For clang-format config see the previous commit.
Reviewed-by: Marek Olšák <[email protected]>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4319>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4319>
21 files changed, 8970 insertions, 9286 deletions
diff --git a/src/gallium/drivers/radeon/.editorconfig b/src/gallium/drivers/radeon/.editorconfig deleted file mode 100644 index cc8e11ffd65..00000000000 --- a/src/gallium/drivers/radeon/.editorconfig +++ /dev/null @@ -1,2 +0,0 @@ -[*.{c,h}] -indent_style = tab diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index f570b75dfaa..41f900076e0 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -25,809 +25,808 @@ * **************************************************************************/ -#include <sys/types.h> -#include <assert.h> -#include <errno.h> -#include <unistd.h> -#include <stdio.h> +#include "radeon_uvd.h" #include "pipe/p_video_codec.h" - +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" #include "util/u_video.h" - #include "vl/vl_defines.h" #include "vl/vl_mpeg12_decoder.h" +#include <sys/types.h> -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_uvd.h" +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> #define NUM_BUFFERS 4 #define NUM_MPEG2_REFS 6 -#define NUM_H264_REFS 17 -#define NUM_VC1_REFS 5 +#define NUM_H264_REFS 17 +#define NUM_VC1_REFS 5 -#define FB_BUFFER_OFFSET 0x1000 -#define FB_BUFFER_SIZE 2048 -#define FB_BUFFER_SIZE_TONGA (2048 * 64) -#define IT_SCALING_TABLE_SIZE 992 +#define FB_BUFFER_OFFSET 0x1000 +#define FB_BUFFER_SIZE 2048 +#define FB_BUFFER_SIZE_TONGA (2048 * 64) +#define IT_SCALING_TABLE_SIZE 992 #define UVD_SESSION_CONTEXT_SIZE (128 * 1024) /* UVD decoder representation */ struct ruvd_decoder { - struct pipe_video_codec base; - - ruvd_set_dtb set_dtb; - - unsigned stream_handle; - unsigned stream_type; - unsigned frame_number; - - struct pipe_screen *screen; - struct radeon_winsys* ws; - struct radeon_cmdbuf* cs; - - unsigned cur_buffer; - - struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS]; - struct ruvd_msg *msg; - uint32_t *fb; - unsigned fb_size; - uint8_t *it; - - struct rvid_buffer bs_buffers[NUM_BUFFERS]; - void* bs_ptr; - unsigned bs_size; - - struct rvid_buffer dpb; - bool use_legacy; - struct rvid_buffer ctx; - struct rvid_buffer sessionctx; - struct { - unsigned data0; - unsigned data1; - unsigned cmd; - unsigned cntl; - } reg; - - void *render_pic_list[16]; + struct pipe_video_codec base; + + ruvd_set_dtb set_dtb; + + unsigned stream_handle; + unsigned stream_type; + unsigned frame_number; + + struct pipe_screen *screen; + struct radeon_winsys *ws; + struct radeon_cmdbuf *cs; + + unsigned cur_buffer; + + struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS]; + struct ruvd_msg *msg; + uint32_t *fb; + unsigned fb_size; + uint8_t *it; + + struct rvid_buffer bs_buffers[NUM_BUFFERS]; + void *bs_ptr; + unsigned bs_size; + + struct rvid_buffer dpb; + bool use_legacy; + struct rvid_buffer ctx; + struct rvid_buffer sessionctx; + struct { + unsigned data0; + unsigned data1; + unsigned cmd; + unsigned cntl; + } reg; + + void *render_pic_list[16]; }; /* flush IB to the hardware */ static int flush(struct ruvd_decoder *dec, unsigned flags) { - return dec->ws->cs_flush(dec->cs, flags, NULL); + return dec->ws->cs_flush(dec->cs, flags, NULL); } /* add a new set register command to the IB */ static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val) { - radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0)); - radeon_emit(dec->cs, val); + radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0)); + radeon_emit(dec->cs, val); } /* send a command to the VCPU through the GPCOM registers */ -static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, - struct pb_buffer* buf, uint32_t off, - enum radeon_bo_usage usage, enum radeon_bo_domain domain) +static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, struct pb_buffer *buf, uint32_t off, + enum radeon_bo_usage usage, enum radeon_bo_domain domain) { - int reloc_idx; - - reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); - if (!dec->use_legacy) { - uint64_t addr; - addr = dec->ws->buffer_get_virtual_address(buf); - addr = addr + off; - set_reg(dec, dec->reg.data0, addr); - set_reg(dec, dec->reg.data1, addr >> 32); - } else { - off += dec->ws->buffer_get_reloc_offset(buf); - set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off); - set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4); - } - set_reg(dec, dec->reg.cmd, cmd << 1); + int reloc_idx; + + reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + if (!dec->use_legacy) { + uint64_t addr; + addr = dec->ws->buffer_get_virtual_address(buf); + addr = addr + off; + set_reg(dec, dec->reg.data0, addr); + set_reg(dec, dec->reg.data1, addr >> 32); + } else { + off += dec->ws->buffer_get_reloc_offset(buf); + set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off); + set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4); + } + set_reg(dec, dec->reg.cmd, cmd << 1); } /* do the codec needs an IT buffer ?*/ static bool have_it(struct ruvd_decoder *dec) { - return dec->stream_type == RUVD_CODEC_H264_PERF || - dec->stream_type == RUVD_CODEC_H265; + return dec->stream_type == RUVD_CODEC_H264_PERF || dec->stream_type == RUVD_CODEC_H265; } /* map the next available message/feedback/itscaling buffer */ static void map_msg_fb_it_buf(struct ruvd_decoder *dec) { - struct rvid_buffer* buf; - uint8_t *ptr; + struct rvid_buffer *buf; + uint8_t *ptr; - /* grab the current message/feedback buffer */ - buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + /* grab the current message/feedback buffer */ + buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; - /* and map it for CPU access */ - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + /* and map it for CPU access */ + ptr = + dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - /* calc buffer offsets */ - dec->msg = (struct ruvd_msg *)ptr; - memset(dec->msg, 0, sizeof(*dec->msg)); + /* calc buffer offsets */ + dec->msg = (struct ruvd_msg *)ptr; + memset(dec->msg, 0, sizeof(*dec->msg)); - dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); - if (have_it(dec)) - dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size); + dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); + if (have_it(dec)) + dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size); } /* unmap and send a message command to the VCPU */ static void send_msg_buf(struct ruvd_decoder *dec) { - struct rvid_buffer* buf; - - /* ignore the request if message/feedback buffer isn't mapped */ - if (!dec->msg || !dec->fb) - return; + struct rvid_buffer *buf; - /* grab the current message buffer */ - buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + /* ignore the request if message/feedback buffer isn't mapped */ + if (!dec->msg || !dec->fb) + return; - /* unmap the buffer */ - dec->ws->buffer_unmap(buf->res->buf); - dec->msg = NULL; - dec->fb = NULL; - dec->it = NULL; + /* grab the current message buffer */ + buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + /* unmap the buffer */ + dec->ws->buffer_unmap(buf->res->buf); + dec->msg = NULL; + dec->fb = NULL; + dec->it = NULL; - if (dec->sessionctx.res) - send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER, - dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE, - RADEON_DOMAIN_VRAM); + if (dec->sessionctx.res) + send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER, dec->sessionctx.res->buf, 0, + RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - /* and send it to the hardware */ - send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0, - RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + /* and send it to the hardware */ + send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); } /* cycle to the next set of buffers */ static void next_buffer(struct ruvd_decoder *dec) { - ++dec->cur_buffer; - dec->cur_buffer %= NUM_BUFFERS; + ++dec->cur_buffer; + dec->cur_buffer %= NUM_BUFFERS; } /* convert the profile into something UVD understands */ static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family) { - switch (u_reduce_video_profile(dec->base.profile)) { - case PIPE_VIDEO_FORMAT_MPEG4_AVC: - return (family >= CHIP_TONGA) ? - RUVD_CODEC_H264_PERF : RUVD_CODEC_H264; + switch (u_reduce_video_profile(dec->base.profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + return (family >= CHIP_TONGA) ? RUVD_CODEC_H264_PERF : RUVD_CODEC_H264; - case PIPE_VIDEO_FORMAT_VC1: - return RUVD_CODEC_VC1; + case PIPE_VIDEO_FORMAT_VC1: + return RUVD_CODEC_VC1; - case PIPE_VIDEO_FORMAT_MPEG12: - return RUVD_CODEC_MPEG2; + case PIPE_VIDEO_FORMAT_MPEG12: + return RUVD_CODEC_MPEG2; - case PIPE_VIDEO_FORMAT_MPEG4: - return RUVD_CODEC_MPEG4; + case PIPE_VIDEO_FORMAT_MPEG4: + return RUVD_CODEC_MPEG4; - case PIPE_VIDEO_FORMAT_HEVC: - return RUVD_CODEC_H265; + case PIPE_VIDEO_FORMAT_HEVC: + return RUVD_CODEC_H265; - case PIPE_VIDEO_FORMAT_JPEG: - return RUVD_CODEC_MJPEG; + case PIPE_VIDEO_FORMAT_JPEG: + return RUVD_CODEC_MJPEG; - default: - assert(0); - return 0; - } + default: + assert(0); + return 0; + } } static unsigned calc_ctx_size_h264_perf(struct ruvd_decoder *dec) { - unsigned width_in_mb, height_in_mb, ctx_size; - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - - unsigned max_references = dec->base.max_references + 1; - - // picture width & height in 16 pixel units - width_in_mb = width / VL_MACROBLOCK_WIDTH; - height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); - - if (!dec->use_legacy) { - unsigned fs_in_mb = width_in_mb * height_in_mb; - unsigned num_dpb_buffer; - switch(dec->base.level) { - case 30: - num_dpb_buffer = 8100 / fs_in_mb; - break; - case 31: - num_dpb_buffer = 18000 / fs_in_mb; - break; - case 32: - num_dpb_buffer = 20480 / fs_in_mb; - break; - case 41: - num_dpb_buffer = 32768 / fs_in_mb; - break; - case 42: - num_dpb_buffer = 34816 / fs_in_mb; - break; - case 50: - num_dpb_buffer = 110400 / fs_in_mb; - break; - case 51: - num_dpb_buffer = 184320 / fs_in_mb; - break; - default: - num_dpb_buffer = 184320 / fs_in_mb; - break; - } - num_dpb_buffer++; - max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); - ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256); - } else { - // the firmware seems to always assume a minimum of ref frames - max_references = MAX2(NUM_H264_REFS, max_references); - // macroblock context buffer - ctx_size = align(width_in_mb * height_in_mb * max_references * 192, 256); - } - - return ctx_size; + unsigned width_in_mb, height_in_mb, ctx_size; + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + unsigned max_references = dec->base.max_references + 1; + + // picture width & height in 16 pixel units + width_in_mb = width / VL_MACROBLOCK_WIDTH; + height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); + + if (!dec->use_legacy) { + unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned num_dpb_buffer; + switch (dec->base.level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); + ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256); + } else { + // the firmware seems to always assume a minimum of ref frames + max_references = MAX2(NUM_H264_REFS, max_references); + // macroblock context buffer + ctx_size = align(width_in_mb * height_in_mb * max_references * 192, 256); + } + + return ctx_size; } static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec) { - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - unsigned max_references = dec->base.max_references + 1; + unsigned max_references = dec->base.max_references + 1; - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); - width = align (width, 16); - height = align (height, 16); - return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; + width = align(width, 16); + height = align(height, 16); + return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; } -static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic) +static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, + struct pipe_h265_picture_desc *pic) { - unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; - unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; - unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); + unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; + unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; + unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + unsigned coeff_10bit = + (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; - unsigned max_references = dec->base.max_references + 1; + unsigned max_references = dec->base.max_references + 1; - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); - log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + - pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; - width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; - height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; - num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); - context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); - max_mb_address = (unsigned) ceil(height * 8 / 2048.0); + num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); + context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); + max_mb_address = (unsigned)ceil(height * 8 / 2048.0); - cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; - db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); + cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; + db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); - return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; + return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; } static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec) { - if (((struct si_screen*)dec->screen)->info.family < CHIP_VEGA10) - return 16; - else - return 32; + if (((struct si_screen *)dec->screen)->info.family < CHIP_VEGA10) + return 16; + else + return 32; } /* calculate size of reference picture buffer */ static unsigned calc_dpb_size(struct ruvd_decoder *dec) { - unsigned width_in_mb, height_in_mb, image_size, dpb_size; - - // always align them to MB size for dpb calculation - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - - // always one more for currently decoded picture - unsigned max_references = dec->base.max_references + 1; - - // aligned size of a single frame - image_size = align(width, get_db_pitch_alignment(dec)) * height; - image_size += image_size / 2; - image_size = align(image_size, 1024); - - // picture width & height in 16 pixel units - width_in_mb = width / VL_MACROBLOCK_WIDTH; - height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); - - switch (u_reduce_video_profile(dec->base.profile)) { - case PIPE_VIDEO_FORMAT_MPEG4_AVC: { - if (!dec->use_legacy) { - unsigned fs_in_mb = width_in_mb * height_in_mb; - unsigned alignment = 64, num_dpb_buffer; - - if (dec->stream_type == RUVD_CODEC_H264_PERF) - alignment = 256; - switch(dec->base.level) { - case 30: - num_dpb_buffer = 8100 / fs_in_mb; - break; - case 31: - num_dpb_buffer = 18000 / fs_in_mb; - break; - case 32: - num_dpb_buffer = 20480 / fs_in_mb; - break; - case 41: - num_dpb_buffer = 32768 / fs_in_mb; - break; - case 42: - num_dpb_buffer = 34816 / fs_in_mb; - break; - case 50: - num_dpb_buffer = 110400 / fs_in_mb; - break; - case 51: - num_dpb_buffer = 184320 / fs_in_mb; - break; - default: - num_dpb_buffer = 184320 / fs_in_mb; - break; - } - num_dpb_buffer++; - max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); - dpb_size = image_size * max_references; - if ((dec->stream_type != RUVD_CODEC_H264_PERF) || - (((struct si_screen*)dec->screen)->info.family < CHIP_POLARIS10)) { - dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment); - dpb_size += align(width_in_mb * height_in_mb * 32, alignment); - } - } else { - // the firmware seems to allways assume a minimum of ref frames - max_references = MAX2(NUM_H264_REFS, max_references); - // reference picture buffer - dpb_size = image_size * max_references; - if ((dec->stream_type != RUVD_CODEC_H264_PERF) || - (((struct si_screen*)dec->screen)->info.family < CHIP_POLARIS10)) { - // macroblock context buffer - dpb_size += width_in_mb * height_in_mb * max_references * 192; - // IT surface buffer - dpb_size += width_in_mb * height_in_mb * 32; - } - } - break; - } - - case PIPE_VIDEO_FORMAT_HEVC: - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); - - width = align (width, 16); - height = align (height, 16); - if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) - dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 9) / 4, 256) * max_references; - else - dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 3) / 2, 256) * max_references; - break; - - case PIPE_VIDEO_FORMAT_VC1: - // the firmware seems to allways assume a minimum of ref frames - max_references = MAX2(NUM_VC1_REFS, max_references); - - // reference picture buffer - dpb_size = image_size * max_references; - - // CONTEXT_BUFFER - dpb_size += width_in_mb * height_in_mb * 128; - - // IT surface buffer - dpb_size += width_in_mb * 64; - - // DB surface buffer - dpb_size += width_in_mb * 128; - - // BP - dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); - break; - - case PIPE_VIDEO_FORMAT_MPEG12: - // reference picture buffer, must be big enough for all frames - dpb_size = image_size * NUM_MPEG2_REFS; - break; - - case PIPE_VIDEO_FORMAT_MPEG4: - // reference picture buffer - dpb_size = image_size * max_references; - - // CM - dpb_size += width_in_mb * height_in_mb * 64; - - // IT surface buffer - dpb_size += align(width_in_mb * height_in_mb * 32, 64); - - dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); - break; - - case PIPE_VIDEO_FORMAT_JPEG: - dpb_size = 0; - break; - - default: - // something is missing here - assert(0); - - // at least use a sane default value - dpb_size = 32 * 1024 * 1024; - break; - } - return dpb_size; + unsigned width_in_mb, height_in_mb, image_size, dpb_size; + + // always align them to MB size for dpb calculation + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + // always one more for currently decoded picture + unsigned max_references = dec->base.max_references + 1; + + // aligned size of a single frame + image_size = align(width, get_db_pitch_alignment(dec)) * height; + image_size += image_size / 2; + image_size = align(image_size, 1024); + + // picture width & height in 16 pixel units + width_in_mb = width / VL_MACROBLOCK_WIDTH; + height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); + + switch (u_reduce_video_profile(dec->base.profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + if (!dec->use_legacy) { + unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned alignment = 64, num_dpb_buffer; + + if (dec->stream_type == RUVD_CODEC_H264_PERF) + alignment = 256; + switch (dec->base.level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); + dpb_size = image_size * max_references; + if ((dec->stream_type != RUVD_CODEC_H264_PERF) || + (((struct si_screen *)dec->screen)->info.family < CHIP_POLARIS10)) { + dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment); + dpb_size += align(width_in_mb * height_in_mb * 32, alignment); + } + } else { + // the firmware seems to allways assume a minimum of ref frames + max_references = MAX2(NUM_H264_REFS, max_references); + // reference picture buffer + dpb_size = image_size * max_references; + if ((dec->stream_type != RUVD_CODEC_H264_PERF) || + (((struct si_screen *)dec->screen)->info.family < CHIP_POLARIS10)) { + // macroblock context buffer + dpb_size += width_in_mb * height_in_mb * max_references * 192; + // IT surface buffer + dpb_size += width_in_mb * height_in_mb * 32; + } + } + break; + } + + case PIPE_VIDEO_FORMAT_HEVC: + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align(width, 16); + height = align(height, 16); + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 9) / 4, 256) * + max_references; + else + dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 3) / 2, 256) * + max_references; + break; + + case PIPE_VIDEO_FORMAT_VC1: + // the firmware seems to allways assume a minimum of ref frames + max_references = MAX2(NUM_VC1_REFS, max_references); + + // reference picture buffer + dpb_size = image_size * max_references; + + // CONTEXT_BUFFER + dpb_size += width_in_mb * height_in_mb * 128; + + // IT surface buffer + dpb_size += width_in_mb * 64; + + // DB surface buffer + dpb_size += width_in_mb * 128; + + // BP + dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); + break; + + case PIPE_VIDEO_FORMAT_MPEG12: + // reference picture buffer, must be big enough for all frames + dpb_size = image_size * NUM_MPEG2_REFS; + break; + + case PIPE_VIDEO_FORMAT_MPEG4: + // reference picture buffer + dpb_size = image_size * max_references; + + // CM + dpb_size += width_in_mb * height_in_mb * 64; + + // IT surface buffer + dpb_size += align(width_in_mb * height_in_mb * 32, 64); + + dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); + break; + + case PIPE_VIDEO_FORMAT_JPEG: + dpb_size = 0; + break; + + default: + // something is missing here + assert(0); + + // at least use a sane default value + dpb_size = 32 * 1024 * 1024; + break; + } + return dpb_size; } /* free associated data in the video buffer callback */ static void ruvd_destroy_associated_data(void *data) { - /* NOOP, since we only use an intptr */ + /* NOOP, since we only use an intptr */ } /* get h264 specific message bits */ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic) { - struct ruvd_h264 result; - - memset(&result, 0, sizeof(result)); - switch (pic->base.profile) { - case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: - case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: - result.profile = RUVD_H264_PROFILE_BASELINE; - break; - - case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: - result.profile = RUVD_H264_PROFILE_MAIN; - break; - - case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: - result.profile = RUVD_H264_PROFILE_HIGH; - break; - - default: - assert(0); - break; - } - - result.level = dec->base.level; - - result.sps_info_flags = 0; - result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; - result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; - result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; - result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; - - result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; - result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; - result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; - result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; - result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; - - switch (dec->base.chroma_format) { - case PIPE_VIDEO_CHROMA_FORMAT_NONE: - /* TODO: assert? */ - break; - case PIPE_VIDEO_CHROMA_FORMAT_400: - result.chroma_format = 0; - break; - case PIPE_VIDEO_CHROMA_FORMAT_420: - result.chroma_format = 1; - break; - case PIPE_VIDEO_CHROMA_FORMAT_422: - result.chroma_format = 2; - break; - case PIPE_VIDEO_CHROMA_FORMAT_444: - result.chroma_format = 3; - break; - } - - result.pps_info_flags = 0; - result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; - result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; - result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; - result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; - result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; - result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; - result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; - result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; - - result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; - result.slice_group_map_type = pic->pps->slice_group_map_type; - result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; - result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; - result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; - result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; - - memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16); - memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64); - - if (dec->stream_type == RUVD_CODEC_H264_PERF) { - memcpy(dec->it, result.scaling_list_4x4, 6*16); - memcpy((dec->it + 96), result.scaling_list_8x8, 2*64); - } - - result.num_ref_frames = pic->num_ref_frames; - - result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; - result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; - - result.frame_num = pic->frame_num; - memcpy(result.frame_num_list, pic->frame_num_list, 4*16); - result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; - result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; - memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2); - - result.decoded_pic_idx = pic->frame_num; - - return result; + struct ruvd_h264 result; + + memset(&result, 0, sizeof(result)); + switch (pic->base.profile) { + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: + result.profile = RUVD_H264_PROFILE_BASELINE; + break; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + result.profile = RUVD_H264_PROFILE_MAIN; + break; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + result.profile = RUVD_H264_PROFILE_HIGH; + break; + + default: + assert(0); + break; + } + + result.level = dec->base.level; + + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; + result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; + result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; + result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; + + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; + result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + + switch (dec->base.chroma_format) { + case PIPE_VIDEO_CHROMA_FORMAT_NONE: + /* TODO: assert? */ + break; + case PIPE_VIDEO_CHROMA_FORMAT_400: + result.chroma_format = 0; + break; + case PIPE_VIDEO_CHROMA_FORMAT_420: + result.chroma_format = 1; + break; + case PIPE_VIDEO_CHROMA_FORMAT_422: + result.chroma_format = 2; + break; + case PIPE_VIDEO_CHROMA_FORMAT_444: + result.chroma_format = 3; + break; + } + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; + result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; + result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; + result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; + result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; + result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; + + result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; + result.slice_group_map_type = pic->pps->slice_group_map_type; + result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; + result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; + result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; + result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; + + memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6 * 16); + memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2 * 64); + + if (dec->stream_type == RUVD_CODEC_H264_PERF) { + memcpy(dec->it, result.scaling_list_4x4, 6 * 16); + memcpy((dec->it + 96), result.scaling_list_8x8, 2 * 64); + } + + result.num_ref_frames = pic->num_ref_frames; + + result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; + result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; + + result.frame_num = pic->frame_num; + memcpy(result.frame_num_list, pic->frame_num_list, 4 * 16); + result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; + result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; + memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4 * 16 * 2); + + result.decoded_pic_idx = pic->frame_num; + + return result; } /* get h265 specific message bits */ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target, - struct pipe_h265_picture_desc *pic) + struct pipe_h265_picture_desc *pic) { - struct ruvd_h265 result; - unsigned i, j; - - memset(&result, 0, sizeof(result)); - - result.sps_info_flags = 0; - result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; - result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; - result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; - result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; - result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; - result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; - result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; - result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; - result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; - if (((struct si_screen*)dec->screen)->info.family == CHIP_CARRIZO) - result.sps_info_flags |= 1 << 9; - if (pic->UseRefPicList == true) - result.sps_info_flags |= 1 << 10; - - result.chroma_format = pic->pps->sps->chroma_format_idc; - result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; - result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; - result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; - result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; - result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3; - result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size; - result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2; - result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size; - result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter; - result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra; - result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; - result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; - result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; - result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; - result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; - - result.pps_info_flags = 0; - result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; - result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; - result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; - result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; - result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; - result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; - result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; - result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; - result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; - result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; - result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; - result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; - result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; - result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; - result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; - result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; - result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; - result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; - result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; - result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; - //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ??? - - result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; - result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; - result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; - result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; - result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; - result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; - result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; - result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; - result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; - result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; - result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; - result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; - result.init_qp_minus26 = pic->pps->init_qp_minus26; - - for (i = 0; i < 19; ++i) - result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; - - for (i = 0; i < 21; ++i) - result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; - - result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; - result.curr_poc = pic->CurrPicOrderCntVal; - - for (i = 0 ; i < 16 ; i++) { - for (j = 0; (pic->ref[j] != NULL) && (j < 16) ; j++) { - if (dec->render_pic_list[i] == pic->ref[j]) - break; - if (j == 15) - dec->render_pic_list[i] = NULL; - else if (pic->ref[j+1] == NULL) - dec->render_pic_list[i] = NULL; - } - } - for (i = 0 ; i < 16 ; i++) { - if (dec->render_pic_list[i] == NULL) { - dec->render_pic_list[i] = target; - result.curr_idx = i; - break; - } - } - - vl_video_buffer_set_associated_data(target, &dec->base, - (void *)(uintptr_t)result.curr_idx, - &ruvd_destroy_associated_data); - - for (i = 0; i < 16; ++i) { - struct pipe_video_buffer *ref = pic->ref[i]; - uintptr_t ref_pic = 0; - - result.poc_list[i] = pic->PicOrderCntVal[i]; - - if (ref) - ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); - else - ref_pic = 0x7F; - result.ref_pic_list[i] = ref_pic; - } - - for (i = 0; i < 8; ++i) { - result.ref_pic_set_st_curr_before[i] = 0xFF; - result.ref_pic_set_st_curr_after[i] = 0xFF; - result.ref_pic_set_lt_curr[i] = 0xFF; - } - - for (i = 0; i < pic->NumPocStCurrBefore; ++i) - result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; - - for (i = 0; i < pic->NumPocStCurrAfter; ++i) - result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; - - for (i = 0; i < pic->NumPocLtCurr; ++i) - result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; - - for (i = 0; i < 6; ++i) - result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; - - for (i = 0; i < 2; ++i) - result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; - - memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); - memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); - memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); - memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); - - for (i = 0 ; i < 2 ; i++) { - for (j = 0 ; j < 15 ; j++) - result.direct_reflist[i][j] = pic->RefPicList[i][j]; - } - - if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { - if (target->buffer_format == PIPE_FORMAT_P010 || - target->buffer_format == PIPE_FORMAT_P016) { - result.p010_mode = 1; - result.msb_mode = 1; - } else { - result.luma_10to8 = 5; - result.chroma_10to8 = 5; - result.sclr_luma10to8 = 4; - result.sclr_chroma10to8 = 4; - } - } - - /* TODO - result.highestTid; - result.isNonRef; - - IDRPicFlag; - RAPPicFlag; - NumPocTotalCurr; - NumShortTermPictureSliceHeaderBits; - NumLongTermPictureSliceHeaderBits; - - IsLongTerm[16]; - */ - - return result; + struct ruvd_h265 result; + unsigned i, j; + + memset(&result, 0, sizeof(result)); + + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; + result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; + result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; + result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; + result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; + result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; + result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; + result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; + result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; + if (((struct si_screen *)dec->screen)->info.family == CHIP_CARRIZO) + result.sps_info_flags |= 1 << 9; + if (pic->UseRefPicList == true) + result.sps_info_flags |= 1 << 10; + + result.chroma_format = pic->pps->sps->chroma_format_idc; + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; + result.log2_min_luma_coding_block_size_minus3 = + pic->pps->sps->log2_min_luma_coding_block_size_minus3; + result.log2_diff_max_min_luma_coding_block_size = + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + result.log2_min_transform_block_size_minus2 = + pic->pps->sps->log2_min_transform_block_size_minus2; + result.log2_diff_max_min_transform_block_size = + pic->pps->sps->log2_diff_max_min_transform_block_size; + result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter; + result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra; + result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; + result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; + result.log2_min_pcm_luma_coding_block_size_minus3 = + pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; + result.log2_diff_max_min_pcm_luma_coding_block_size = + pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; + result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; + result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; + result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; + result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; + result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; + result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; + result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; + result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; + result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; + result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; + result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; + result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; + result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; + result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; + result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; + result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; + result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; + result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; + // result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ??? + + result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; + result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; + result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; + result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; + result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; + result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; + result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; + result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; + result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; + result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; + result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; + result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; + result.init_qp_minus26 = pic->pps->init_qp_minus26; + + for (i = 0; i < 19; ++i) + result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; + + for (i = 0; i < 21; ++i) + result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; + + result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; + result.curr_poc = pic->CurrPicOrderCntVal; + + for (i = 0; i < 16; i++) { + for (j = 0; (pic->ref[j] != NULL) && (j < 16); j++) { + if (dec->render_pic_list[i] == pic->ref[j]) + break; + if (j == 15) + dec->render_pic_list[i] = NULL; + else if (pic->ref[j + 1] == NULL) + dec->render_pic_list[i] = NULL; + } + } + for (i = 0; i < 16; i++) { + if (dec->render_pic_list[i] == NULL) { + dec->render_pic_list[i] = target; + result.curr_idx = i; + break; + } + } + + vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)result.curr_idx, + &ruvd_destroy_associated_data); + + for (i = 0; i < 16; ++i) { + struct pipe_video_buffer *ref = pic->ref[i]; + uintptr_t ref_pic = 0; + + result.poc_list[i] = pic->PicOrderCntVal[i]; + + if (ref) + ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + else + ref_pic = 0x7F; + result.ref_pic_list[i] = ref_pic; + } + + for (i = 0; i < 8; ++i) { + result.ref_pic_set_st_curr_before[i] = 0xFF; + result.ref_pic_set_st_curr_after[i] = 0xFF; + result.ref_pic_set_lt_curr[i] = 0xFF; + } + + for (i = 0; i < pic->NumPocStCurrBefore; ++i) + result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; + + for (i = 0; i < pic->NumPocStCurrAfter; ++i) + result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; + + for (i = 0; i < pic->NumPocLtCurr; ++i) + result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; + + for (i = 0; i < 6; ++i) + result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; + + for (i = 0; i < 2; ++i) + result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; + + memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); + memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); + memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); + memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); + + for (i = 0; i < 2; i++) { + for (j = 0; j < 15; j++) + result.direct_reflist[i][j] = pic->RefPicList[i][j]; + } + + if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { + if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) { + result.p010_mode = 1; + result.msb_mode = 1; + } else { + result.luma_10to8 = 5; + result.chroma_10to8 = 5; + result.sclr_luma10to8 = 4; + result.sclr_chroma10to8 = 4; + } + } + + /* TODO + result.highestTid; + result.isNonRef; + + IDRPicFlag; + RAPPicFlag; + NumPocTotalCurr; + NumShortTermPictureSliceHeaderBits; + NumLongTermPictureSliceHeaderBits; + + IsLongTerm[16]; + */ + + return result; } /* get vc1 specific message bits */ static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic) { - struct ruvd_vc1 result; - - memset(&result, 0, sizeof(result)); - - switch(pic->base.profile) { - case PIPE_VIDEO_PROFILE_VC1_SIMPLE: - result.profile = RUVD_VC1_PROFILE_SIMPLE; - result.level = 1; - break; - - case PIPE_VIDEO_PROFILE_VC1_MAIN: - result.profile = RUVD_VC1_PROFILE_MAIN; - result.level = 2; - break; - - case PIPE_VIDEO_PROFILE_VC1_ADVANCED: - result.profile = RUVD_VC1_PROFILE_ADVANCED; - result.level = 4; - break; - - default: - assert(0); - } - - /* fields common for all profiles */ - result.sps_info_flags |= pic->postprocflag << 7; - result.sps_info_flags |= pic->pulldown << 6; - result.sps_info_flags |= pic->interlace << 5; - result.sps_info_flags |= pic->tfcntrflag << 4; - result.sps_info_flags |= pic->finterpflag << 3; - result.sps_info_flags |= pic->psf << 1; - - result.pps_info_flags |= pic->range_mapy_flag << 31; - result.pps_info_flags |= pic->range_mapy << 28; - result.pps_info_flags |= pic->range_mapuv_flag << 27; - result.pps_info_flags |= pic->range_mapuv << 24; - result.pps_info_flags |= pic->multires << 21; - result.pps_info_flags |= pic->maxbframes << 16; - result.pps_info_flags |= pic->overlap << 11; - result.pps_info_flags |= pic->quantizer << 9; - result.pps_info_flags |= pic->panscan_flag << 7; - result.pps_info_flags |= pic->refdist_flag << 6; - result.pps_info_flags |= pic->vstransform << 0; - - /* some fields only apply to main/advanced profile */ - if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { - result.pps_info_flags |= pic->syncmarker << 20; - result.pps_info_flags |= pic->rangered << 19; - result.pps_info_flags |= pic->loopfilter << 5; - result.pps_info_flags |= pic->fastuvmc << 4; - result.pps_info_flags |= pic->extended_mv << 3; - result.pps_info_flags |= pic->extended_dmv << 8; - result.pps_info_flags |= pic->dquant << 1; - } - - result.chroma_format = 1; + struct ruvd_vc1 result; + + memset(&result, 0, sizeof(result)); + + switch (pic->base.profile) { + case PIPE_VIDEO_PROFILE_VC1_SIMPLE: + result.profile = RUVD_VC1_PROFILE_SIMPLE; + result.level = 1; + break; + + case PIPE_VIDEO_PROFILE_VC1_MAIN: + result.profile = RUVD_VC1_PROFILE_MAIN; + result.level = 2; + break; + + case PIPE_VIDEO_PROFILE_VC1_ADVANCED: + result.profile = RUVD_VC1_PROFILE_ADVANCED; + result.level = 4; + break; + + default: + assert(0); + } + + /* fields common for all profiles */ + result.sps_info_flags |= pic->postprocflag << 7; + result.sps_info_flags |= pic->pulldown << 6; + result.sps_info_flags |= pic->interlace << 5; + result.sps_info_flags |= pic->tfcntrflag << 4; + result.sps_info_flags |= pic->finterpflag << 3; + result.sps_info_flags |= pic->psf << 1; + + result.pps_info_flags |= pic->range_mapy_flag << 31; + result.pps_info_flags |= pic->range_mapy << 28; + result.pps_info_flags |= pic->range_mapuv_flag << 27; + result.pps_info_flags |= pic->range_mapuv << 24; + result.pps_info_flags |= pic->multires << 21; + result.pps_info_flags |= pic->maxbframes << 16; + result.pps_info_flags |= pic->overlap << 11; + result.pps_info_flags |= pic->quantizer << 9; + result.pps_info_flags |= pic->panscan_flag << 7; + result.pps_info_flags |= pic->refdist_flag << 6; + result.pps_info_flags |= pic->vstransform << 0; + + /* some fields only apply to main/advanced profile */ + if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { + result.pps_info_flags |= pic->syncmarker << 20; + result.pps_info_flags |= pic->rangered << 19; + result.pps_info_flags |= pic->loopfilter << 5; + result.pps_info_flags |= pic->fastuvmc << 4; + result.pps_info_flags |= pic->extended_mv << 3; + result.pps_info_flags |= pic->extended_dmv << 8; + result.pps_info_flags |= pic->dquant << 1; + } + + result.chroma_format = 1; #if 0 //(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT) @@ -838,132 +837,132 @@ uint8_t deblockEnable uint8_t pquant #endif - return result; + return result; } /* extract the frame number from a referenced video buffer */ static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref) { - uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; - uint32_t max = MAX2(dec->frame_number, 1) - 1; - uintptr_t frame; + uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; + uint32_t max = MAX2(dec->frame_number, 1) - 1; + uintptr_t frame; - /* seems to be the most sane fallback */ - if (!ref) - return max; + /* seems to be the most sane fallback */ + if (!ref) + return max; - /* get the frame number from the associated data */ - frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + /* get the frame number from the associated data */ + frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); - /* limit the frame number to a valid range */ - return MAX2(MIN2(frame, max), min); + /* limit the frame number to a valid range */ + return MAX2(MIN2(frame, max), min); } /* get mpeg2 specific msg bits */ static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec, - struct pipe_mpeg12_picture_desc *pic) + struct pipe_mpeg12_picture_desc *pic) { - const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; - struct ruvd_mpeg2 result; - unsigned i; - - memset(&result, 0, sizeof(result)); - result.decoded_pic_idx = dec->frame_number; - for (i = 0; i < 2; ++i) - result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); - - if(pic->intra_matrix) { - result.load_intra_quantiser_matrix = 1; - for (i = 0; i < 64; ++i) { - result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; - } - } - if(pic->non_intra_matrix) { - result.load_nonintra_quantiser_matrix = 1; - for (i = 0; i < 64; ++i) { - result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; - } - } - - result.profile_and_level_indication = 0; - result.chroma_format = 0x1; - - result.picture_coding_type = pic->picture_coding_type; - result.f_code[0][0] = pic->f_code[0][0] + 1; - result.f_code[0][1] = pic->f_code[0][1] + 1; - result.f_code[1][0] = pic->f_code[1][0] + 1; - result.f_code[1][1] = pic->f_code[1][1] + 1; - result.intra_dc_precision = pic->intra_dc_precision; - result.pic_structure = pic->picture_structure; - result.top_field_first = pic->top_field_first; - result.frame_pred_frame_dct = pic->frame_pred_frame_dct; - result.concealment_motion_vectors = pic->concealment_motion_vectors; - result.q_scale_type = pic->q_scale_type; - result.intra_vlc_format = pic->intra_vlc_format; - result.alternate_scan = pic->alternate_scan; - - return result; + const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; + struct ruvd_mpeg2 result; + unsigned i; + + memset(&result, 0, sizeof(result)); + result.decoded_pic_idx = dec->frame_number; + for (i = 0; i < 2; ++i) + result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); + + if (pic->intra_matrix) { + result.load_intra_quantiser_matrix = 1; + for (i = 0; i < 64; ++i) { + result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; + } + } + if (pic->non_intra_matrix) { + result.load_nonintra_quantiser_matrix = 1; + for (i = 0; i < 64; ++i) { + result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; + } + } + + result.profile_and_level_indication = 0; + result.chroma_format = 0x1; + + result.picture_coding_type = pic->picture_coding_type; + result.f_code[0][0] = pic->f_code[0][0] + 1; + result.f_code[0][1] = pic->f_code[0][1] + 1; + result.f_code[1][0] = pic->f_code[1][0] + 1; + result.f_code[1][1] = pic->f_code[1][1] + 1; + result.intra_dc_precision = pic->intra_dc_precision; + result.pic_structure = pic->picture_structure; + result.top_field_first = pic->top_field_first; + result.frame_pred_frame_dct = pic->frame_pred_frame_dct; + result.concealment_motion_vectors = pic->concealment_motion_vectors; + result.q_scale_type = pic->q_scale_type; + result.intra_vlc_format = pic->intra_vlc_format; + result.alternate_scan = pic->alternate_scan; + + return result; } /* get mpeg4 specific msg bits */ static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec, - struct pipe_mpeg4_picture_desc *pic) + struct pipe_mpeg4_picture_desc *pic) { - struct ruvd_mpeg4 result; - unsigned i; - - memset(&result, 0, sizeof(result)); - result.decoded_pic_idx = dec->frame_number; - for (i = 0; i < 2; ++i) - result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); - - result.variant_type = 0; - result.profile_and_level_indication = 0xF0; // ASP Level0 - - result.video_object_layer_verid = 0x5; // advanced simple - result.video_object_layer_shape = 0x0; // rectangular - - result.video_object_layer_width = dec->base.width; - result.video_object_layer_height = dec->base.height; - - result.vop_time_increment_resolution = pic->vop_time_increment_resolution; - - result.flags |= pic->short_video_header << 0; - //result.flags |= obmc_disable << 1; - result.flags |= pic->interlaced << 2; - result.flags |= 1 << 3; // load_intra_quant_mat - result.flags |= 1 << 4; // load_nonintra_quant_mat - result.flags |= pic->quarter_sample << 5; - result.flags |= 1 << 6; // complexity_estimation_disable - result.flags |= pic->resync_marker_disable << 7; - //result.flags |= data_partitioned << 8; - //result.flags |= reversible_vlc << 9; - result.flags |= 0 << 10; // newpred_enable - result.flags |= 0 << 11; // reduced_resolution_vop_enable - //result.flags |= scalability << 12; - //result.flags |= is_object_layer_identifier << 13; - //result.flags |= fixed_vop_rate << 14; - //result.flags |= newpred_segment_type << 15; - - result.quant_type = pic->quant_type; - - for (i = 0; i < 64; ++i) { - result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; - result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; - } - - /* - int32_t trd [2] - int32_t trb [2] - uint8_t vop_coding_type - uint8_t vop_fcode_forward - uint8_t vop_fcode_backward - uint8_t rounding_control - uint8_t alternate_vertical_scan_flag - uint8_t top_field_first - */ - - return result; + struct ruvd_mpeg4 result; + unsigned i; + + memset(&result, 0, sizeof(result)); + result.decoded_pic_idx = dec->frame_number; + for (i = 0; i < 2; ++i) + result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); + + result.variant_type = 0; + result.profile_and_level_indication = 0xF0; // ASP Level0 + + result.video_object_layer_verid = 0x5; // advanced simple + result.video_object_layer_shape = 0x0; // rectangular + + result.video_object_layer_width = dec->base.width; + result.video_object_layer_height = dec->base.height; + + result.vop_time_increment_resolution = pic->vop_time_increment_resolution; + + result.flags |= pic->short_video_header << 0; + // result.flags |= obmc_disable << 1; + result.flags |= pic->interlaced << 2; + result.flags |= 1 << 3; // load_intra_quant_mat + result.flags |= 1 << 4; // load_nonintra_quant_mat + result.flags |= pic->quarter_sample << 5; + result.flags |= 1 << 6; // complexity_estimation_disable + result.flags |= pic->resync_marker_disable << 7; + // result.flags |= data_partitioned << 8; + // result.flags |= reversible_vlc << 9; + result.flags |= 0 << 10; // newpred_enable + result.flags |= 0 << 11; // reduced_resolution_vop_enable + // result.flags |= scalability << 12; + // result.flags |= is_object_layer_identifier << 13; + // result.flags |= fixed_vop_rate << 14; + // result.flags |= newpred_segment_type << 15; + + result.quant_type = pic->quant_type; + + for (i = 0; i < 64; ++i) { + result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; + result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; + } + + /* + int32_t trd [2] + int32_t trb [2] + uint8_t vop_coding_type + uint8_t vop_fcode_forward + uint8_t vop_fcode_backward + uint8_t rounding_control + uint8_t alternate_vertical_scan_flag + uint8_t top_field_first + */ + + return result; } /** @@ -971,237 +970,236 @@ static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec, */ static void ruvd_destroy(struct pipe_video_codec *decoder) { - struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; - unsigned i; + struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder; + unsigned i; - assert(decoder); + assert(decoder); - map_msg_fb_it_buf(dec); - dec->msg->size = sizeof(*dec->msg); - dec->msg->msg_type = RUVD_MSG_DESTROY; - dec->msg->stream_handle = dec->stream_handle; - send_msg_buf(dec); + map_msg_fb_it_buf(dec); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_DESTROY; + dec->msg->stream_handle = dec->stream_handle; + send_msg_buf(dec); - flush(dec, 0); + flush(dec, 0); - dec->ws->cs_destroy(dec->cs); + dec->ws->cs_destroy(dec->cs); - for (i = 0; i < NUM_BUFFERS; ++i) { - si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); - si_vid_destroy_buffer(&dec->bs_buffers[i]); - } + for (i = 0; i < NUM_BUFFERS; ++i) { + si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); + si_vid_destroy_buffer(&dec->bs_buffers[i]); + } - si_vid_destroy_buffer(&dec->dpb); - si_vid_destroy_buffer(&dec->ctx); - si_vid_destroy_buffer(&dec->sessionctx); + si_vid_destroy_buffer(&dec->dpb); + si_vid_destroy_buffer(&dec->ctx); + si_vid_destroy_buffer(&dec->sessionctx); - FREE(dec); + FREE(dec); } /** * start decoding of a new frame */ -static void ruvd_begin_frame(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) +static void ruvd_begin_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; - uintptr_t frame; + struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder; + uintptr_t frame; - assert(decoder); + assert(decoder); - frame = ++dec->frame_number; - vl_video_buffer_set_associated_data(target, decoder, (void *)frame, - &ruvd_destroy_associated_data); + frame = ++dec->frame_number; + vl_video_buffer_set_associated_data(target, decoder, (void *)frame, + &ruvd_destroy_associated_data); - dec->bs_size = 0; - dec->bs_ptr = dec->ws->buffer_map( - dec->bs_buffers[dec->cur_buffer].res->buf, - dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + dec->bs_size = 0; + dec->bs_ptr = dec->ws->buffer_map(dec->bs_buffers[dec->cur_buffer].res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); } /** * decode a macroblock */ static void ruvd_decode_macroblock(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture, - const struct pipe_macroblock *macroblocks, - unsigned num_macroblocks) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, + const struct pipe_macroblock *macroblocks, + unsigned num_macroblocks) { - /* not supported (yet) */ - assert(0); + /* not supported (yet) */ + assert(0); } /** * decode a bitstream */ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture, - unsigned num_buffers, - const void * const *buffers, - const unsigned *sizes) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, unsigned num_buffers, + const void *const *buffers, const unsigned *sizes) { - struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; - unsigned i; - - assert(decoder); - - if (!dec->bs_ptr) - return; - - for (i = 0; i < num_buffers; ++i) { - struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; - unsigned new_size = dec->bs_size + sizes[i]; - - if (new_size > buf->res->buf->size) { - dec->ws->buffer_unmap(buf->res->buf); - if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { - RVID_ERR("Can't resize bitstream buffer!"); - return; - } - - dec->bs_ptr = dec->ws->buffer_map( - buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - if (!dec->bs_ptr) - return; - - dec->bs_ptr += dec->bs_size; - } - - memcpy(dec->bs_ptr, buffers[i], sizes[i]); - dec->bs_size += sizes[i]; - dec->bs_ptr += sizes[i]; - } + struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder; + unsigned i; + + assert(decoder); + + if (!dec->bs_ptr) + return; + + for (i = 0; i < num_buffers; ++i) { + struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; + unsigned new_size = dec->bs_size + sizes[i]; + + if (new_size > buf->res->buf->size) { + dec->ws->buffer_unmap(buf->res->buf); + if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { + RVID_ERR("Can't resize bitstream buffer!"); + return; + } + + dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + if (!dec->bs_ptr) + return; + + dec->bs_ptr += dec->bs_size; + } + + memcpy(dec->bs_ptr, buffers[i], sizes[i]); + dec->bs_size += sizes[i]; + dec->bs_ptr += sizes[i]; + } } /** * end decoding of the current frame */ -static void ruvd_end_frame(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) +static void ruvd_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; - struct pb_buffer *dt; - struct rvid_buffer *msg_fb_it_buf, *bs_buf; - unsigned bs_size; - - assert(decoder); - - if (!dec->bs_ptr) - return; - - msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; - bs_buf = &dec->bs_buffers[dec->cur_buffer]; - - bs_size = align(dec->bs_size, 128); - memset(dec->bs_ptr, 0, bs_size - dec->bs_size); - dec->ws->buffer_unmap(bs_buf->res->buf); - - map_msg_fb_it_buf(dec); - dec->msg->size = sizeof(*dec->msg); - dec->msg->msg_type = RUVD_MSG_DECODE; - dec->msg->stream_handle = dec->stream_handle; - dec->msg->status_report_feedback_number = dec->frame_number; - - dec->msg->body.decode.stream_type = dec->stream_type; - dec->msg->body.decode.decode_flags = 0x1; - dec->msg->body.decode.width_in_samples = dec->base.width; - dec->msg->body.decode.height_in_samples = dec->base.height; - - if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) || - (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) { - dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16; - dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16; - } - - if (dec->dpb.res) - dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size; - dec->msg->body.decode.bsd_size = bs_size; - dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec)); - - if (dec->stream_type == RUVD_CODEC_H264_PERF && - ((struct si_screen*)dec->screen)->info.family >= CHIP_POLARIS10) - dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; - - dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); - if (((struct si_screen*)dec->screen)->info.family >= CHIP_STONEY) - dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2; - - switch (u_reduce_video_profile(picture->profile)) { - case PIPE_VIDEO_FORMAT_MPEG4_AVC: - dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture); - break; - - case PIPE_VIDEO_FORMAT_HEVC: - dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture); - if (dec->ctx.res == NULL) { - unsigned ctx_size; - if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) - ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc*)picture); - else - ctx_size = calc_ctx_size_h265_main(dec); - if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated context buffer.\n"); - } - si_vid_clear_buffer(decoder->context, &dec->ctx); - } - - if (dec->ctx.res) - dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; - break; - - case PIPE_VIDEO_FORMAT_VC1: - dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture); - break; - - case PIPE_VIDEO_FORMAT_MPEG12: - dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture); - break; - - case PIPE_VIDEO_FORMAT_MPEG4: - dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture); - break; - - case PIPE_VIDEO_FORMAT_JPEG: - break; - - default: - assert(0); - return; - } - - dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config; - dec->msg->body.decode.extension_support = 0x1; - - /* set at least the feedback buffer size */ - dec->fb[0] = dec->fb_size; - - send_msg_buf(dec); - - if (dec->dpb.res) - send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, - RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - - if (dec->ctx.res) - send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, - RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, - 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, - RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); - send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf, - FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); - if (have_it(dec)) - send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf, - FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - set_reg(dec, dec->reg.cntl, 1); - - flush(dec, PIPE_FLUSH_ASYNC); - next_buffer(dec); + struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder; + struct pb_buffer *dt; + struct rvid_buffer *msg_fb_it_buf, *bs_buf; + unsigned bs_size; + + assert(decoder); + + if (!dec->bs_ptr) + return; + + msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + bs_buf = &dec->bs_buffers[dec->cur_buffer]; + + bs_size = align(dec->bs_size, 128); + memset(dec->bs_ptr, 0, bs_size - dec->bs_size); + dec->ws->buffer_unmap(bs_buf->res->buf); + + map_msg_fb_it_buf(dec); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_DECODE; + dec->msg->stream_handle = dec->stream_handle; + dec->msg->status_report_feedback_number = dec->frame_number; + + dec->msg->body.decode.stream_type = dec->stream_type; + dec->msg->body.decode.decode_flags = 0x1; + dec->msg->body.decode.width_in_samples = dec->base.width; + dec->msg->body.decode.height_in_samples = dec->base.height; + + if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) || + (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) { + dec->msg->body.decode.width_in_samples = + align(dec->msg->body.decode.width_in_samples, 16) / 16; + dec->msg->body.decode.height_in_samples = + align(dec->msg->body.decode.height_in_samples, 16) / 16; + } + + if (dec->dpb.res) + dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size; + dec->msg->body.decode.bsd_size = bs_size; + dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec)); + + if (dec->stream_type == RUVD_CODEC_H264_PERF && + ((struct si_screen *)dec->screen)->info.family >= CHIP_POLARIS10) + dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; + + dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); + if (((struct si_screen *)dec->screen)->info.family >= CHIP_STONEY) + dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2; + + switch (u_reduce_video_profile(picture->profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + dec->msg->body.decode.codec.h264 = + get_h264_msg(dec, (struct pipe_h264_picture_desc *)picture); + break; + + case PIPE_VIDEO_FORMAT_HEVC: + dec->msg->body.decode.codec.h265 = + get_h265_msg(dec, target, (struct pipe_h265_picture_desc *)picture); + if (dec->ctx.res == NULL) { + unsigned ctx_size; + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc *)picture); + else + ctx_size = calc_ctx_size_h265_main(dec); + if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated context buffer.\n"); + } + si_vid_clear_buffer(decoder->context, &dec->ctx); + } + + if (dec->ctx.res) + dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; + break; + + case PIPE_VIDEO_FORMAT_VC1: + dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc *)picture); + break; + + case PIPE_VIDEO_FORMAT_MPEG12: + dec->msg->body.decode.codec.mpeg2 = + get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc *)picture); + break; + + case PIPE_VIDEO_FORMAT_MPEG4: + dec->msg->body.decode.codec.mpeg4 = + get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc *)picture); + break; + + case PIPE_VIDEO_FORMAT_JPEG: + break; + + default: + assert(0); + return; + } + + dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config; + dec->msg->body.decode.extension_support = 0x1; + + /* set at least the feedback buffer size */ + dec->fb[0] = dec->fb_size; + + send_msg_buf(dec); + + if (dec->dpb.res) + send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE, + RADEON_DOMAIN_VRAM); + + if (dec->ctx.res) + send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, RADEON_USAGE_READWRITE, + RADEON_DOMAIN_VRAM); + send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, 0, RADEON_USAGE_READ, + RADEON_DOMAIN_GTT); + send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); + send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf, FB_BUFFER_OFFSET, + RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); + if (have_it(dec)) + send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf, + FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + set_reg(dec, dec->reg.cntl, 1); + + flush(dec, PIPE_FLUSH_ASYNC); + next_buffer(dec); } /** @@ -1215,276 +1213,290 @@ static void ruvd_flush(struct pipe_video_codec *decoder) * create and UVD decoder */ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context, - const struct pipe_video_codec *templ, - ruvd_set_dtb set_dtb) + const struct pipe_video_codec *templ, + ruvd_set_dtb set_dtb) { - struct si_context *sctx = (struct si_context*)context; - struct radeon_winsys *ws = sctx->ws; - unsigned dpb_size; - unsigned width = templ->width, height = templ->height; - unsigned bs_buf_size; - struct ruvd_decoder *dec; - int r, i; - - switch(u_reduce_video_profile(templ->profile)) { - case PIPE_VIDEO_FORMAT_MPEG12: - if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM) - return vl_create_mpeg12_decoder(context, templ); - - /* fall through */ - case PIPE_VIDEO_FORMAT_MPEG4: - width = align(width, VL_MACROBLOCK_WIDTH); - height = align(height, VL_MACROBLOCK_HEIGHT); - break; - case PIPE_VIDEO_FORMAT_MPEG4_AVC: - width = align(width, VL_MACROBLOCK_WIDTH); - height = align(height, VL_MACROBLOCK_HEIGHT); - break; - - default: - break; - } - - - dec = CALLOC_STRUCT(ruvd_decoder); - - if (!dec) - return NULL; - - if (!sctx->screen->info.is_amdgpu) - dec->use_legacy = true; - - dec->base = *templ; - dec->base.context = context; - dec->base.width = width; - dec->base.height = height; - - dec->base.destroy = ruvd_destroy; - dec->base.begin_frame = ruvd_begin_frame; - dec->base.decode_macroblock = ruvd_decode_macroblock; - dec->base.decode_bitstream = ruvd_decode_bitstream; - dec->base.end_frame = ruvd_end_frame; - dec->base.flush = ruvd_flush; - - dec->stream_type = profile2stream_type(dec, sctx->family); - dec->set_dtb = set_dtb; - dec->stream_handle = si_vid_alloc_stream_handle(); - dec->screen = context->screen; - dec->ws = ws; - dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL, false); - if (!dec->cs) { - RVID_ERR("Can't get command submission context.\n"); - goto error; - } - - for (i = 0; i < 16; i++) - dec->render_pic_list[i] = NULL; - dec->fb_size = (sctx->family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : - FB_BUFFER_SIZE; - bs_buf_size = width * height * (512 / (16 * 16)); - for (i = 0; i < NUM_BUFFERS; ++i) { - unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size; - STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET); - if (have_it(dec)) - msg_fb_it_size += IT_SCALING_TABLE_SIZE; - if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i], - msg_fb_it_size, PIPE_USAGE_STAGING)) { - RVID_ERR("Can't allocated message buffers.\n"); - goto error; - } - - if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], - bs_buf_size, PIPE_USAGE_STAGING)) { - RVID_ERR("Can't allocated bitstream buffers.\n"); - goto error; - } - - si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]); - si_vid_clear_buffer(context, &dec->bs_buffers[i]); - } - - dpb_size = calc_dpb_size(dec); - if (dpb_size) { - if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated dpb.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->dpb); - } - - if (dec->stream_type == RUVD_CODEC_H264_PERF && sctx->family >= CHIP_POLARIS10) { - unsigned ctx_size = calc_ctx_size_h264_perf(dec); - if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated context buffer.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->ctx); - } - - if (sctx->family >= CHIP_POLARIS10 && sctx->screen->info.drm_minor >= 3) { - if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, - UVD_SESSION_CONTEXT_SIZE, - PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated session ctx.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->sessionctx); - } - - if (sctx->family >= CHIP_VEGA10) { - dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15; - dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15; - dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15; - dec->reg.cntl = RUVD_ENGINE_CNTL_SOC15; - } else { - dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0; - dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1; - dec->reg.cmd = RUVD_GPCOM_VCPU_CMD; - dec->reg.cntl = RUVD_ENGINE_CNTL; - } - - map_msg_fb_it_buf(dec); - dec->msg->size = sizeof(*dec->msg); - dec->msg->msg_type = RUVD_MSG_CREATE; - dec->msg->stream_handle = dec->stream_handle; - dec->msg->body.create.stream_type = dec->stream_type; - dec->msg->body.create.width_in_samples = dec->base.width; - dec->msg->body.create.height_in_samples = dec->base.height; - dec->msg->body.create.dpb_size = dpb_size; - send_msg_buf(dec); - r = flush(dec, 0); - if (r) - goto error; - - next_buffer(dec); - - return &dec->base; + struct si_context *sctx = (struct si_context *)context; + struct radeon_winsys *ws = sctx->ws; + unsigned dpb_size; + unsigned width = templ->width, height = templ->height; + unsigned bs_buf_size; + struct ruvd_decoder *dec; + int r, i; + + switch (u_reduce_video_profile(templ->profile)) { + case PIPE_VIDEO_FORMAT_MPEG12: + if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM) + return vl_create_mpeg12_decoder(context, templ); + + /* fall through */ + case PIPE_VIDEO_FORMAT_MPEG4: + width = align(width, VL_MACROBLOCK_WIDTH); + height = align(height, VL_MACROBLOCK_HEIGHT); + break; + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + width = align(width, VL_MACROBLOCK_WIDTH); + height = align(height, VL_MACROBLOCK_HEIGHT); + break; + + default: + break; + } + + dec = CALLOC_STRUCT(ruvd_decoder); + + if (!dec) + return NULL; + + if (!sctx->screen->info.is_amdgpu) + dec->use_legacy = true; + + dec->base = *templ; + dec->base.context = context; + dec->base.width = width; + dec->base.height = height; + + dec->base.destroy = ruvd_destroy; + dec->base.begin_frame = ruvd_begin_frame; + dec->base.decode_macroblock = ruvd_decode_macroblock; + dec->base.decode_bitstream = ruvd_decode_bitstream; + dec->base.end_frame = ruvd_end_frame; + dec->base.flush = ruvd_flush; + + dec->stream_type = profile2stream_type(dec, sctx->family); + dec->set_dtb = set_dtb; + dec->stream_handle = si_vid_alloc_stream_handle(); + dec->screen = context->screen; + dec->ws = ws; + dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL, false); + if (!dec->cs) { + RVID_ERR("Can't get command submission context.\n"); + goto error; + } + + for (i = 0; i < 16; i++) + dec->render_pic_list[i] = NULL; + dec->fb_size = (sctx->family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE; + bs_buf_size = width * height * (512 / (16 * 16)); + for (i = 0; i < NUM_BUFFERS; ++i) { + unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size; + STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET); + if (have_it(dec)) + msg_fb_it_size += IT_SCALING_TABLE_SIZE; + if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i], msg_fb_it_size, + PIPE_USAGE_STAGING)) { + RVID_ERR("Can't allocated message buffers.\n"); + goto error; + } + + if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], bs_buf_size, + PIPE_USAGE_STAGING)) { + RVID_ERR("Can't allocated bitstream buffers.\n"); + goto error; + } + + si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]); + si_vid_clear_buffer(context, &dec->bs_buffers[i]); + } + + dpb_size = calc_dpb_size(dec); + if (dpb_size) { + if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated dpb.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->dpb); + } + + if (dec->stream_type == RUVD_CODEC_H264_PERF && sctx->family >= CHIP_POLARIS10) { + unsigned ctx_size = calc_ctx_size_h264_perf(dec); + if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated context buffer.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->ctx); + } + + if (sctx->family >= CHIP_POLARIS10 && sctx->screen->info.drm_minor >= 3) { + if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, UVD_SESSION_CONTEXT_SIZE, + PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated session ctx.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->sessionctx); + } + + if (sctx->family >= CHIP_VEGA10) { + dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15; + dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15; + dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15; + dec->reg.cntl = RUVD_ENGINE_CNTL_SOC15; + } else { + dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0; + dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1; + dec->reg.cmd = RUVD_GPCOM_VCPU_CMD; + dec->reg.cntl = RUVD_ENGINE_CNTL; + } + + map_msg_fb_it_buf(dec); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_CREATE; + dec->msg->stream_handle = dec->stream_handle; + dec->msg->body.create.stream_type = dec->stream_type; + dec->msg->body.create.width_in_samples = dec->base.width; + dec->msg->body.create.height_in_samples = dec->base.height; + dec->msg->body.create.dpb_size = dpb_size; + send_msg_buf(dec); + r = flush(dec, 0); + if (r) + goto error; + + next_buffer(dec); + + return &dec->base; error: - if (dec->cs) dec->ws->cs_destroy(dec->cs); + if (dec->cs) + dec->ws->cs_destroy(dec->cs); - for (i = 0; i < NUM_BUFFERS; ++i) { - si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); - si_vid_destroy_buffer(&dec->bs_buffers[i]); - } + for (i = 0; i < NUM_BUFFERS; ++i) { + si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); + si_vid_destroy_buffer(&dec->bs_buffers[i]); + } - si_vid_destroy_buffer(&dec->dpb); - si_vid_destroy_buffer(&dec->ctx); - si_vid_destroy_buffer(&dec->sessionctx); + si_vid_destroy_buffer(&dec->dpb); + si_vid_destroy_buffer(&dec->ctx); + si_vid_destroy_buffer(&dec->sessionctx); - FREE(dec); + FREE(dec); - return NULL; + return NULL; } /* calculate top/bottom offset */ static unsigned texture_offset(struct radeon_surf *surface, unsigned layer, - enum ruvd_surface_type type) + enum ruvd_surface_type type) { - switch (type) { - default: - case RUVD_SURFACE_TYPE_LEGACY: - return surface->u.legacy.level[0].offset + - layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4; - break; - case RUVD_SURFACE_TYPE_GFX9: - return surface->u.gfx9.surf_offset + - layer * surface->u.gfx9.surf_slice_size; - break; - } + switch (type) { + default: + case RUVD_SURFACE_TYPE_LEGACY: + return surface->u.legacy.level[0].offset + + layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4; + break; + case RUVD_SURFACE_TYPE_GFX9: + return surface->u.gfx9.surf_offset + layer * surface->u.gfx9.surf_slice_size; + break; + } } /* hw encode the aspect of macro tiles */ static unsigned macro_tile_aspect(unsigned macro_tile_aspect) { - switch (macro_tile_aspect) { - default: - case 1: macro_tile_aspect = 0; break; - case 2: macro_tile_aspect = 1; break; - case 4: macro_tile_aspect = 2; break; - case 8: macro_tile_aspect = 3; break; - } - return macro_tile_aspect; + switch (macro_tile_aspect) { + default: + case 1: + macro_tile_aspect = 0; + break; + case 2: + macro_tile_aspect = 1; + break; + case 4: + macro_tile_aspect = 2; + break; + case 8: + macro_tile_aspect = 3; + break; + } + return macro_tile_aspect; } /* hw encode the bank width and height */ static unsigned bank_wh(unsigned bankwh) { - switch (bankwh) { - default: - case 1: bankwh = 0; break; - case 2: bankwh = 1; break; - case 4: bankwh = 2; break; - case 8: bankwh = 3; break; - } - return bankwh; + switch (bankwh) { + default: + case 1: + bankwh = 0; + break; + case 2: + bankwh = 1; + break; + case 4: + bankwh = 2; + break; + case 8: + bankwh = 3; + break; + } + return bankwh; } /** * fill decoding target field from the luma and chroma surfaces */ void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, - struct radeon_surf *chroma, enum ruvd_surface_type type) + struct radeon_surf *chroma, enum ruvd_surface_type type) { - switch (type) { - default: - case RUVD_SURFACE_TYPE_LEGACY: - msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w; - switch (luma->u.legacy.level[0].mode) { - case RADEON_SURF_MODE_LINEAR_ALIGNED: - msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; - break; - case RADEON_SURF_MODE_1D: - msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN; - break; - case RADEON_SURF_MODE_2D: - msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN; - break; - default: - assert(0); - break; - } - - msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type); - if (chroma) - msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type); - if (msg->body.decode.dt_field_mode) { - msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type); - if (chroma) - msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type); - } else { - msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; - msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; - } - - if (chroma) { - assert(luma->u.legacy.bankw == chroma->u.legacy.bankw); - assert(luma->u.legacy.bankh == chroma->u.legacy.bankh); - assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea); - } - - msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw)); - msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh)); - msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea)); - break; - case RUVD_SURFACE_TYPE_GFX9: - msg->body.decode.dt_pitch = luma->u.gfx9.surf_pitch * luma->blk_w; - /* SWIZZLE LINEAR MODE */ - msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; - msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type); - msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type); - if (msg->body.decode.dt_field_mode) { - msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type); - msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type); - } else { - msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; - msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; - } - msg->body.decode.dt_surf_tile_config = 0; - break; - } + switch (type) { + default: + case RUVD_SURFACE_TYPE_LEGACY: + msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w; + switch (luma->u.legacy.level[0].mode) { + case RADEON_SURF_MODE_LINEAR_ALIGNED: + msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; + break; + case RADEON_SURF_MODE_1D: + msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN; + break; + case RADEON_SURF_MODE_2D: + msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN; + break; + default: + assert(0); + break; + } + + msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type); + if (chroma) + msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type); + if (msg->body.decode.dt_field_mode) { + msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type); + if (chroma) + msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type); + } else { + msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; + msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; + } + + if (chroma) { + assert(luma->u.legacy.bankw == chroma->u.legacy.bankw); + assert(luma->u.legacy.bankh == chroma->u.legacy.bankh); + assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea); + } + + msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw)); + msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh)); + msg->body.decode.dt_surf_tile_config |= + RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea)); + break; + case RUVD_SURFACE_TYPE_GFX9: + msg->body.decode.dt_pitch = luma->u.gfx9.surf_pitch * luma->blk_w; + /* SWIZZLE LINEAR MODE */ + msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; + msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type); + msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type); + if (msg->body.decode.dt_field_mode) { + msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type); + msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type); + } else { + msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; + msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; + } + msg->body.decode.dt_surf_tile_config = 0; + break; + } } diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h index 583b4d5e47d..c1d22193054 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.h +++ b/src/gallium/drivers/radeon/radeon_uvd.h @@ -32,410 +32,410 @@ #include "vl/vl_video_buffer.h" /* UVD uses PM4 packet type 0 and 2 */ -#define RUVD_PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30) -#define RUVD_PKT_TYPE_G(x) (((x) >> 30) & 0x3) -#define RUVD_PKT_TYPE_C 0x3FFFFFFF -#define RUVD_PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16) -#define RUVD_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) -#define RUVD_PKT_COUNT_C 0xC000FFFF -#define RUVD_PKT0_BASE_INDEX_S(x) (((unsigned)(x) & 0xFFFF) << 0) -#define RUVD_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) -#define RUVD_PKT0_BASE_INDEX_C 0xFFFF0000 -#define RUVD_PKT0(index, count) (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count)) -#define RUVD_PKT2() (RUVD_PKT_TYPE_S(2)) +#define RUVD_PKT_TYPE_S(x) (((unsigned)(x)&0x3) << 30) +#define RUVD_PKT_TYPE_G(x) (((x) >> 30) & 0x3) +#define RUVD_PKT_TYPE_C 0x3FFFFFFF +#define RUVD_PKT_COUNT_S(x) (((unsigned)(x)&0x3FFF) << 16) +#define RUVD_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) +#define RUVD_PKT_COUNT_C 0xC000FFFF +#define RUVD_PKT0_BASE_INDEX_S(x) (((unsigned)(x)&0xFFFF) << 0) +#define RUVD_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) +#define RUVD_PKT0_BASE_INDEX_C 0xFFFF0000 +#define RUVD_PKT0(index, count) \ + (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count)) +#define RUVD_PKT2() (RUVD_PKT_TYPE_S(2)) /* registers involved with UVD */ -#define RUVD_GPCOM_VCPU_CMD 0xEF0C -#define RUVD_GPCOM_VCPU_DATA0 0xEF10 -#define RUVD_GPCOM_VCPU_DATA1 0xEF14 -#define RUVD_ENGINE_CNTL 0xEF18 +#define RUVD_GPCOM_VCPU_CMD 0xEF0C +#define RUVD_GPCOM_VCPU_DATA0 0xEF10 +#define RUVD_GPCOM_VCPU_DATA1 0xEF14 +#define RUVD_ENGINE_CNTL 0xEF18 -#define RUVD_GPCOM_VCPU_CMD_SOC15 0x2070c -#define RUVD_GPCOM_VCPU_DATA0_SOC15 0x20710 -#define RUVD_GPCOM_VCPU_DATA1_SOC15 0x20714 -#define RUVD_ENGINE_CNTL_SOC15 0x20718 +#define RUVD_GPCOM_VCPU_CMD_SOC15 0x2070c +#define RUVD_GPCOM_VCPU_DATA0_SOC15 0x20710 +#define RUVD_GPCOM_VCPU_DATA1_SOC15 0x20714 +#define RUVD_ENGINE_CNTL_SOC15 0x20718 /* UVD commands to VCPU */ -#define RUVD_CMD_MSG_BUFFER 0x00000000 -#define RUVD_CMD_DPB_BUFFER 0x00000001 -#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002 -#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003 -#define RUVD_CMD_SESSION_CONTEXT_BUFFER 0x00000005 -#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100 -#define RUVD_CMD_ITSCALING_TABLE_BUFFER 0x00000204 -#define RUVD_CMD_CONTEXT_BUFFER 0x00000206 +#define RUVD_CMD_MSG_BUFFER 0x00000000 +#define RUVD_CMD_DPB_BUFFER 0x00000001 +#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002 +#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003 +#define RUVD_CMD_SESSION_CONTEXT_BUFFER 0x00000005 +#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100 +#define RUVD_CMD_ITSCALING_TABLE_BUFFER 0x00000204 +#define RUVD_CMD_CONTEXT_BUFFER 0x00000206 /* UVD message types */ -#define RUVD_MSG_CREATE 0 -#define RUVD_MSG_DECODE 1 -#define RUVD_MSG_DESTROY 2 +#define RUVD_MSG_CREATE 0 +#define RUVD_MSG_DECODE 1 +#define RUVD_MSG_DESTROY 2 /* UVD stream types */ -#define RUVD_CODEC_H264 0x00000000 -#define RUVD_CODEC_VC1 0x00000001 -#define RUVD_CODEC_MPEG2 0x00000003 -#define RUVD_CODEC_MPEG4 0x00000004 -#define RUVD_CODEC_H264_PERF 0x00000007 -#define RUVD_CODEC_MJPEG 0x00000008 -#define RUVD_CODEC_H265 0x00000010 +#define RUVD_CODEC_H264 0x00000000 +#define RUVD_CODEC_VC1 0x00000001 +#define RUVD_CODEC_MPEG2 0x00000003 +#define RUVD_CODEC_MPEG4 0x00000004 +#define RUVD_CODEC_H264_PERF 0x00000007 +#define RUVD_CODEC_MJPEG 0x00000008 +#define RUVD_CODEC_H265 0x00000010 /* UVD decode target buffer tiling mode */ -#define RUVD_TILE_LINEAR 0x00000000 -#define RUVD_TILE_8X4 0x00000001 -#define RUVD_TILE_8X8 0x00000002 -#define RUVD_TILE_32AS8 0x00000003 +#define RUVD_TILE_LINEAR 0x00000000 +#define RUVD_TILE_8X4 0x00000001 +#define RUVD_TILE_8X8 0x00000002 +#define RUVD_TILE_32AS8 0x00000003 /* UVD decode target buffer array mode */ -#define RUVD_ARRAY_MODE_LINEAR 0x00000000 -#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001 -#define RUVD_ARRAY_MODE_1D_THIN 0x00000002 -#define RUVD_ARRAY_MODE_2D_THIN 0x00000004 -#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004 -#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005 +#define RUVD_ARRAY_MODE_LINEAR 0x00000000 +#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001 +#define RUVD_ARRAY_MODE_1D_THIN 0x00000002 +#define RUVD_ARRAY_MODE_2D_THIN 0x00000004 +#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004 +#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005 /* UVD tile config */ -#define RUVD_BANK_WIDTH(x) ((x) << 0) -#define RUVD_BANK_HEIGHT(x) ((x) << 3) -#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6) -#define RUVD_NUM_BANKS(x) ((x) << 9) +#define RUVD_BANK_WIDTH(x) ((x) << 0) +#define RUVD_BANK_HEIGHT(x) ((x) << 3) +#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6) +#define RUVD_NUM_BANKS(x) ((x) << 9) /* H.264 profile definitions */ -#define RUVD_H264_PROFILE_BASELINE 0x00000000 -#define RUVD_H264_PROFILE_MAIN 0x00000001 -#define RUVD_H264_PROFILE_HIGH 0x00000002 -#define RUVD_H264_PROFILE_STEREO_HIGH 0x00000003 -#define RUVD_H264_PROFILE_MVC 0x00000004 +#define RUVD_H264_PROFILE_BASELINE 0x00000000 +#define RUVD_H264_PROFILE_MAIN 0x00000001 +#define RUVD_H264_PROFILE_HIGH 0x00000002 +#define RUVD_H264_PROFILE_STEREO_HIGH 0x00000003 +#define RUVD_H264_PROFILE_MVC 0x00000004 /* VC-1 profile definitions */ -#define RUVD_VC1_PROFILE_SIMPLE 0x00000000 -#define RUVD_VC1_PROFILE_MAIN 0x00000001 -#define RUVD_VC1_PROFILE_ADVANCED 0x00000002 +#define RUVD_VC1_PROFILE_SIMPLE 0x00000000 +#define RUVD_VC1_PROFILE_MAIN 0x00000001 +#define RUVD_VC1_PROFILE_ADVANCED 0x00000002 -enum ruvd_surface_type { - RUVD_SURFACE_TYPE_LEGACY = 0, - RUVD_SURFACE_TYPE_GFX9 +enum ruvd_surface_type +{ + RUVD_SURFACE_TYPE_LEGACY = 0, + RUVD_SURFACE_TYPE_GFX9 }; struct ruvd_mvc_element { - uint16_t viewOrderIndex; - uint16_t viewId; - uint16_t numOfAnchorRefsInL0; - uint16_t viewIdOfAnchorRefsInL0[15]; - uint16_t numOfAnchorRefsInL1; - uint16_t viewIdOfAnchorRefsInL1[15]; - uint16_t numOfNonAnchorRefsInL0; - uint16_t viewIdOfNonAnchorRefsInL0[15]; - uint16_t numOfNonAnchorRefsInL1; - uint16_t viewIdOfNonAnchorRefsInL1[15]; + uint16_t viewOrderIndex; + uint16_t viewId; + uint16_t numOfAnchorRefsInL0; + uint16_t viewIdOfAnchorRefsInL0[15]; + uint16_t numOfAnchorRefsInL1; + uint16_t viewIdOfAnchorRefsInL1[15]; + uint16_t numOfNonAnchorRefsInL0; + uint16_t viewIdOfNonAnchorRefsInL0[15]; + uint16_t numOfNonAnchorRefsInL1; + uint16_t viewIdOfNonAnchorRefsInL1[15]; }; struct ruvd_h264 { - uint32_t profile; - uint32_t level; + uint32_t profile; + uint32_t level; - uint32_t sps_info_flags; - uint32_t pps_info_flags; - uint8_t chroma_format; - uint8_t bit_depth_luma_minus8; - uint8_t bit_depth_chroma_minus8; - uint8_t log2_max_frame_num_minus4; + uint32_t sps_info_flags; + uint32_t pps_info_flags; + uint8_t chroma_format; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t log2_max_frame_num_minus4; - uint8_t pic_order_cnt_type; - uint8_t log2_max_pic_order_cnt_lsb_minus4; - uint8_t num_ref_frames; - uint8_t reserved_8bit; + uint8_t pic_order_cnt_type; + uint8_t log2_max_pic_order_cnt_lsb_minus4; + uint8_t num_ref_frames; + uint8_t reserved_8bit; - int8_t pic_init_qp_minus26; - int8_t pic_init_qs_minus26; - int8_t chroma_qp_index_offset; - int8_t second_chroma_qp_index_offset; + int8_t pic_init_qp_minus26; + int8_t pic_init_qs_minus26; + int8_t chroma_qp_index_offset; + int8_t second_chroma_qp_index_offset; - uint8_t num_slice_groups_minus1; - uint8_t slice_group_map_type; - uint8_t num_ref_idx_l0_active_minus1; - uint8_t num_ref_idx_l1_active_minus1; + uint8_t num_slice_groups_minus1; + uint8_t slice_group_map_type; + uint8_t num_ref_idx_l0_active_minus1; + uint8_t num_ref_idx_l1_active_minus1; - uint16_t slice_group_change_rate_minus1; - uint16_t reserved_16bit_1; + uint16_t slice_group_change_rate_minus1; + uint16_t reserved_16bit_1; - uint8_t scaling_list_4x4[6][16]; - uint8_t scaling_list_8x8[2][64]; + uint8_t scaling_list_4x4[6][16]; + uint8_t scaling_list_8x8[2][64]; - uint32_t frame_num; - uint32_t frame_num_list[16]; - int32_t curr_field_order_cnt_list[2]; - int32_t field_order_cnt_list[16][2]; + uint32_t frame_num; + uint32_t frame_num_list[16]; + int32_t curr_field_order_cnt_list[2]; + int32_t field_order_cnt_list[16][2]; - uint32_t decoded_pic_idx; + uint32_t decoded_pic_idx; - uint32_t curr_pic_ref_frame_num; + uint32_t curr_pic_ref_frame_num; - uint8_t ref_frame_list[16]; + uint8_t ref_frame_list[16]; - uint32_t reserved[122]; + uint32_t reserved[122]; - struct { - uint32_t numViews; - uint32_t viewId0; - struct ruvd_mvc_element mvcElements[1]; - } mvc; + struct { + uint32_t numViews; + uint32_t viewId0; + struct ruvd_mvc_element mvcElements[1]; + } mvc; }; struct ruvd_h265 { - uint32_t sps_info_flags; - uint32_t pps_info_flags; - - uint8_t chroma_format; - uint8_t bit_depth_luma_minus8; - uint8_t bit_depth_chroma_minus8; - uint8_t log2_max_pic_order_cnt_lsb_minus4; - - uint8_t sps_max_dec_pic_buffering_minus1; - uint8_t log2_min_luma_coding_block_size_minus3; - uint8_t log2_diff_max_min_luma_coding_block_size; - uint8_t log2_min_transform_block_size_minus2; - - uint8_t log2_diff_max_min_transform_block_size; - uint8_t max_transform_hierarchy_depth_inter; - uint8_t max_transform_hierarchy_depth_intra; - uint8_t pcm_sample_bit_depth_luma_minus1; - - uint8_t pcm_sample_bit_depth_chroma_minus1; - uint8_t log2_min_pcm_luma_coding_block_size_minus3; - uint8_t log2_diff_max_min_pcm_luma_coding_block_size; - uint8_t num_extra_slice_header_bits; - - uint8_t num_short_term_ref_pic_sets; - uint8_t num_long_term_ref_pic_sps; - uint8_t num_ref_idx_l0_default_active_minus1; - uint8_t num_ref_idx_l1_default_active_minus1; - - int8_t pps_cb_qp_offset; - int8_t pps_cr_qp_offset; - int8_t pps_beta_offset_div2; - int8_t pps_tc_offset_div2; - - uint8_t diff_cu_qp_delta_depth; - uint8_t num_tile_columns_minus1; - uint8_t num_tile_rows_minus1; - uint8_t log2_parallel_merge_level_minus2; - - uint16_t column_width_minus1[19]; - uint16_t row_height_minus1[21]; - - int8_t init_qp_minus26; - uint8_t num_delta_pocs_ref_rps_idx; - uint8_t curr_idx; - uint8_t reserved1; - int32_t curr_poc; - uint8_t ref_pic_list[16]; - int32_t poc_list[16]; - uint8_t ref_pic_set_st_curr_before[8]; - uint8_t ref_pic_set_st_curr_after[8]; - uint8_t ref_pic_set_lt_curr[8]; - - uint8_t ucScalingListDCCoefSizeID2[6]; - uint8_t ucScalingListDCCoefSizeID3[2]; - - uint8_t highestTid; - uint8_t isNonRef; - - uint8_t p010_mode; - uint8_t msb_mode; - uint8_t luma_10to8; - uint8_t chroma_10to8; - uint8_t sclr_luma10to8; - uint8_t sclr_chroma10to8; - - uint8_t direct_reflist[2][15]; + uint32_t sps_info_flags; + uint32_t pps_info_flags; + + uint8_t chroma_format; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t log2_max_pic_order_cnt_lsb_minus4; + + uint8_t sps_max_dec_pic_buffering_minus1; + uint8_t log2_min_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_luma_coding_block_size; + uint8_t log2_min_transform_block_size_minus2; + + uint8_t log2_diff_max_min_transform_block_size; + uint8_t max_transform_hierarchy_depth_inter; + uint8_t max_transform_hierarchy_depth_intra; + uint8_t pcm_sample_bit_depth_luma_minus1; + + uint8_t pcm_sample_bit_depth_chroma_minus1; + uint8_t log2_min_pcm_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_pcm_luma_coding_block_size; + uint8_t num_extra_slice_header_bits; + + uint8_t num_short_term_ref_pic_sets; + uint8_t num_long_term_ref_pic_sps; + uint8_t num_ref_idx_l0_default_active_minus1; + uint8_t num_ref_idx_l1_default_active_minus1; + + int8_t pps_cb_qp_offset; + int8_t pps_cr_qp_offset; + int8_t pps_beta_offset_div2; + int8_t pps_tc_offset_div2; + + uint8_t diff_cu_qp_delta_depth; + uint8_t num_tile_columns_minus1; + uint8_t num_tile_rows_minus1; + uint8_t log2_parallel_merge_level_minus2; + + uint16_t column_width_minus1[19]; + uint16_t row_height_minus1[21]; + + int8_t init_qp_minus26; + uint8_t num_delta_pocs_ref_rps_idx; + uint8_t curr_idx; + uint8_t reserved1; + int32_t curr_poc; + uint8_t ref_pic_list[16]; + int32_t poc_list[16]; + uint8_t ref_pic_set_st_curr_before[8]; + uint8_t ref_pic_set_st_curr_after[8]; + uint8_t ref_pic_set_lt_curr[8]; + + uint8_t ucScalingListDCCoefSizeID2[6]; + uint8_t ucScalingListDCCoefSizeID3[2]; + + uint8_t highestTid; + uint8_t isNonRef; + + uint8_t p010_mode; + uint8_t msb_mode; + uint8_t luma_10to8; + uint8_t chroma_10to8; + uint8_t sclr_luma10to8; + uint8_t sclr_chroma10to8; + + uint8_t direct_reflist[2][15]; }; struct ruvd_vc1 { - uint32_t profile; - uint32_t level; - uint32_t sps_info_flags; - uint32_t pps_info_flags; - uint32_t pic_structure; - uint32_t chroma_format; + uint32_t profile; + uint32_t level; + uint32_t sps_info_flags; + uint32_t pps_info_flags; + uint32_t pic_structure; + uint32_t chroma_format; }; struct ruvd_mpeg2 { - uint32_t decoded_pic_idx; - uint32_t ref_pic_idx[2]; - - uint8_t load_intra_quantiser_matrix; - uint8_t load_nonintra_quantiser_matrix; - uint8_t reserved_quantiser_alignement[2]; - uint8_t intra_quantiser_matrix[64]; - uint8_t nonintra_quantiser_matrix[64]; - - uint8_t profile_and_level_indication; - uint8_t chroma_format; - - uint8_t picture_coding_type; - - uint8_t reserved_1; - - uint8_t f_code[2][2]; - uint8_t intra_dc_precision; - uint8_t pic_structure; - uint8_t top_field_first; - uint8_t frame_pred_frame_dct; - uint8_t concealment_motion_vectors; - uint8_t q_scale_type; - uint8_t intra_vlc_format; - uint8_t alternate_scan; + uint32_t decoded_pic_idx; + uint32_t ref_pic_idx[2]; + + uint8_t load_intra_quantiser_matrix; + uint8_t load_nonintra_quantiser_matrix; + uint8_t reserved_quantiser_alignement[2]; + uint8_t intra_quantiser_matrix[64]; + uint8_t nonintra_quantiser_matrix[64]; + + uint8_t profile_and_level_indication; + uint8_t chroma_format; + + uint8_t picture_coding_type; + + uint8_t reserved_1; + + uint8_t f_code[2][2]; + uint8_t intra_dc_precision; + uint8_t pic_structure; + uint8_t top_field_first; + uint8_t frame_pred_frame_dct; + uint8_t concealment_motion_vectors; + uint8_t q_scale_type; + uint8_t intra_vlc_format; + uint8_t alternate_scan; }; -struct ruvd_mpeg4 -{ - uint32_t decoded_pic_idx; - uint32_t ref_pic_idx[2]; +struct ruvd_mpeg4 { + uint32_t decoded_pic_idx; + uint32_t ref_pic_idx[2]; - uint32_t variant_type; - uint8_t profile_and_level_indication; + uint32_t variant_type; + uint8_t profile_and_level_indication; - uint8_t video_object_layer_verid; - uint8_t video_object_layer_shape; + uint8_t video_object_layer_verid; + uint8_t video_object_layer_shape; - uint8_t reserved_1; + uint8_t reserved_1; - uint16_t video_object_layer_width; - uint16_t video_object_layer_height; + uint16_t video_object_layer_width; + uint16_t video_object_layer_height; - uint16_t vop_time_increment_resolution; + uint16_t vop_time_increment_resolution; - uint16_t reserved_2; + uint16_t reserved_2; - uint32_t flags; + uint32_t flags; - uint8_t quant_type; + uint8_t quant_type; - uint8_t reserved_3[3]; + uint8_t reserved_3[3]; - uint8_t intra_quant_mat[64]; - uint8_t nonintra_quant_mat[64]; + uint8_t intra_quant_mat[64]; + uint8_t nonintra_quant_mat[64]; - struct { - uint8_t sprite_enable; + struct { + uint8_t sprite_enable; - uint8_t reserved_4[3]; + uint8_t reserved_4[3]; - uint16_t sprite_width; - uint16_t sprite_height; - int16_t sprite_left_coordinate; - int16_t sprite_top_coordinate; + uint16_t sprite_width; + uint16_t sprite_height; + int16_t sprite_left_coordinate; + int16_t sprite_top_coordinate; - uint8_t no_of_sprite_warping_points; - uint8_t sprite_warping_accuracy; - uint8_t sprite_brightness_change; - uint8_t low_latency_sprite_enable; - } sprite_config; + uint8_t no_of_sprite_warping_points; + uint8_t sprite_warping_accuracy; + uint8_t sprite_brightness_change; + uint8_t low_latency_sprite_enable; + } sprite_config; - struct { - uint32_t flags; - uint8_t vol_mode; - uint8_t reserved_5[3]; - } divx_311_config; + struct { + uint32_t flags; + uint8_t vol_mode; + uint8_t reserved_5[3]; + } divx_311_config; }; /* message between driver and hardware */ struct ruvd_msg { - uint32_t size; - uint32_t msg_type; - uint32_t stream_handle; - uint32_t status_report_feedback_number; - - union { - struct { - uint32_t stream_type; - uint32_t session_flags; - uint32_t asic_id; - uint32_t width_in_samples; - uint32_t height_in_samples; - uint32_t dpb_buffer; - uint32_t dpb_size; - uint32_t dpb_model; - uint32_t version_info; - } create; - - struct { - uint32_t stream_type; - uint32_t decode_flags; - uint32_t width_in_samples; - uint32_t height_in_samples; - - uint32_t dpb_buffer; - uint32_t dpb_size; - uint32_t dpb_model; - uint32_t dpb_reserved; - - uint32_t db_offset_alignment; - uint32_t db_pitch; - uint32_t db_tiling_mode; - uint32_t db_array_mode; - uint32_t db_field_mode; - uint32_t db_surf_tile_config; - uint32_t db_aligned_height; - uint32_t db_reserved; - - uint32_t use_addr_macro; - - uint32_t bsd_buffer; - uint32_t bsd_size; - - uint32_t pic_param_buffer; - uint32_t pic_param_size; - uint32_t mb_cntl_buffer; - uint32_t mb_cntl_size; - - uint32_t dt_buffer; - uint32_t dt_pitch; - uint32_t dt_tiling_mode; - uint32_t dt_array_mode; - uint32_t dt_field_mode; - uint32_t dt_luma_top_offset; - uint32_t dt_luma_bottom_offset; - uint32_t dt_chroma_top_offset; - uint32_t dt_chroma_bottom_offset; - uint32_t dt_surf_tile_config; - uint32_t dt_uv_surf_tile_config; - // re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney - uint32_t dt_wa_chroma_top_offset; - uint32_t dt_wa_chroma_bottom_offset; - - uint32_t reserved[16]; - - union { - struct ruvd_h264 h264; - struct ruvd_h265 h265; - struct ruvd_vc1 vc1; - struct ruvd_mpeg2 mpeg2; - struct ruvd_mpeg4 mpeg4; - - uint32_t info[768]; - } codec; - - uint8_t extension_support; - uint8_t reserved_8bit_1; - uint8_t reserved_8bit_2; - uint8_t reserved_8bit_3; - uint32_t extension_reserved[64]; - } decode; - } body; + uint32_t size; + uint32_t msg_type; + uint32_t stream_handle; + uint32_t status_report_feedback_number; + + union { + struct { + uint32_t stream_type; + uint32_t session_flags; + uint32_t asic_id; + uint32_t width_in_samples; + uint32_t height_in_samples; + uint32_t dpb_buffer; + uint32_t dpb_size; + uint32_t dpb_model; + uint32_t version_info; + } create; + + struct { + uint32_t stream_type; + uint32_t decode_flags; + uint32_t width_in_samples; + uint32_t height_in_samples; + + uint32_t dpb_buffer; + uint32_t dpb_size; + uint32_t dpb_model; + uint32_t dpb_reserved; + + uint32_t db_offset_alignment; + uint32_t db_pitch; + uint32_t db_tiling_mode; + uint32_t db_array_mode; + uint32_t db_field_mode; + uint32_t db_surf_tile_config; + uint32_t db_aligned_height; + uint32_t db_reserved; + + uint32_t use_addr_macro; + + uint32_t bsd_buffer; + uint32_t bsd_size; + + uint32_t pic_param_buffer; + uint32_t pic_param_size; + uint32_t mb_cntl_buffer; + uint32_t mb_cntl_size; + + uint32_t dt_buffer; + uint32_t dt_pitch; + uint32_t dt_tiling_mode; + uint32_t dt_array_mode; + uint32_t dt_field_mode; + uint32_t dt_luma_top_offset; + uint32_t dt_luma_bottom_offset; + uint32_t dt_chroma_top_offset; + uint32_t dt_chroma_bottom_offset; + uint32_t dt_surf_tile_config; + uint32_t dt_uv_surf_tile_config; + // re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney + uint32_t dt_wa_chroma_top_offset; + uint32_t dt_wa_chroma_bottom_offset; + + uint32_t reserved[16]; + + union { + struct ruvd_h264 h264; + struct ruvd_h265 h265; + struct ruvd_vc1 vc1; + struct ruvd_mpeg2 mpeg2; + struct ruvd_mpeg4 mpeg4; + + uint32_t info[768]; + } codec; + + uint8_t extension_support; + uint8_t reserved_8bit_1; + uint8_t reserved_8bit_2; + uint8_t reserved_8bit_3; + uint32_t extension_reserved[64]; + } decode; + } body; }; /* driver dependent callback */ -typedef struct pb_buffer* (*ruvd_set_dtb) -(struct ruvd_msg* msg, struct vl_video_buffer *vb); +typedef struct pb_buffer *(*ruvd_set_dtb)(struct ruvd_msg *msg, struct vl_video_buffer *vb); /* create an UVD decode */ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context, - const struct pipe_video_codec *templat, - ruvd_set_dtb set_dtb); + const struct pipe_video_codec *templat, + ruvd_set_dtb set_dtb); /* fill decoding target field from the luma and chroma surfaces */ void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, - struct radeon_surf *chroma, enum ruvd_surface_type type); + struct radeon_surf *chroma, enum ruvd_surface_type type); #endif diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c index b2b742aab9b..4ac4515819d 100644 --- a/src/gallium/drivers/radeon/radeon_uvd_enc.c +++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c @@ -25,109 +25,90 @@ * **************************************************************************/ -#include <stdio.h> +#include "radeon_uvd_enc.h" #include "pipe/p_video_codec.h" - -#include "util/u_video.h" +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" - +#include "util/u_video.h" #include "vl/vl_video_buffer.h" -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_uvd_enc.h" +#include <stdio.h> -#define UVD_HEVC_LEVEL_1 30 -#define UVD_HEVC_LEVEL_2 60 -#define UVD_HEVC_LEVEL_2_1 63 -#define UVD_HEVC_LEVEL_3 90 -#define UVD_HEVC_LEVEL_3_1 93 -#define UVD_HEVC_LEVEL_4 120 -#define UVD_HEVC_LEVEL_4_1 123 -#define UVD_HEVC_LEVEL_5 150 -#define UVD_HEVC_LEVEL_5_1 153 -#define UVD_HEVC_LEVEL_5_2 156 -#define UVD_HEVC_LEVEL_6 180 -#define UVD_HEVC_LEVEL_6_1 183 -#define UVD_HEVC_LEVEL_6_2 186 - -static void -radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, - struct pipe_h265_enc_picture_desc *pic) +#define UVD_HEVC_LEVEL_1 30 +#define UVD_HEVC_LEVEL_2 60 +#define UVD_HEVC_LEVEL_2_1 63 +#define UVD_HEVC_LEVEL_3 90 +#define UVD_HEVC_LEVEL_3_1 93 +#define UVD_HEVC_LEVEL_4 120 +#define UVD_HEVC_LEVEL_4_1 123 +#define UVD_HEVC_LEVEL_5 150 +#define UVD_HEVC_LEVEL_5_1 153 +#define UVD_HEVC_LEVEL_5_2 156 +#define UVD_HEVC_LEVEL_6 180 +#define UVD_HEVC_LEVEL_6_1 183 +#define UVD_HEVC_LEVEL_6_2 186 + +static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, + struct pipe_h265_enc_picture_desc *pic) { enc->enc_pic.picture_type = pic->picture_type; enc->enc_pic.frame_num = pic->frame_num; enc->enc_pic.pic_order_cnt = pic->pic_order_cnt; enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type; enc->enc_pic.not_referenced = pic->not_referenced; - enc->enc_pic.is_iframe = - (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_IDR) - || (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_I); + enc->enc_pic.is_iframe = (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_IDR) || + (pic->picture_type == PIPE_H265_ENC_PICTURE_TYPE_I); enc->enc_pic.crop_left = 0; - enc->enc_pic.crop_right = - (align(enc->base.width, 16) - enc->base.width) / 2; + enc->enc_pic.crop_right = (align(enc->base.width, 16) - enc->base.width) / 2; enc->enc_pic.crop_top = 0; - enc->enc_pic.crop_bottom = - (align(enc->base.height, 16) - enc->base.height) / 2; + enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - enc->base.height) / 2; enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag; enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc; enc->enc_pic.general_level_idc = pic->seq.general_level_idc; - enc->enc_pic.max_poc = - MAX2(16, util_next_power_of_two(pic->seq.intra_period)); + enc->enc_pic.max_poc = MAX2(16, util_next_power_of_two(pic->seq.intra_period)); enc->enc_pic.log2_max_poc = 0; for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++) i = (i >> 1); enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc; - enc->enc_pic.pic_width_in_luma_samples = - pic->seq.pic_width_in_luma_samples; - enc->enc_pic.pic_height_in_luma_samples = - pic->seq.pic_height_in_luma_samples; + enc->enc_pic.pic_width_in_luma_samples = pic->seq.pic_width_in_luma_samples; + enc->enc_pic.pic_height_in_luma_samples = pic->seq.pic_height_in_luma_samples; enc->enc_pic.log2_diff_max_min_luma_coding_block_size = pic->seq.log2_diff_max_min_luma_coding_block_size; enc->enc_pic.log2_min_transform_block_size_minus2 = pic->seq.log2_min_transform_block_size_minus2; enc->enc_pic.log2_diff_max_min_transform_block_size = pic->seq.log2_diff_max_min_transform_block_size; - enc->enc_pic.max_transform_hierarchy_depth_inter = - pic->seq.max_transform_hierarchy_depth_inter; - enc->enc_pic.max_transform_hierarchy_depth_intra = - pic->seq.max_transform_hierarchy_depth_intra; - enc->enc_pic.log2_parallel_merge_level_minus2 = - pic->pic.log2_parallel_merge_level_minus2; + enc->enc_pic.max_transform_hierarchy_depth_inter = pic->seq.max_transform_hierarchy_depth_inter; + enc->enc_pic.max_transform_hierarchy_depth_intra = pic->seq.max_transform_hierarchy_depth_intra; + enc->enc_pic.log2_parallel_merge_level_minus2 = pic->pic.log2_parallel_merge_level_minus2; enc->enc_pic.bit_depth_luma_minus8 = pic->seq.bit_depth_luma_minus8; enc->enc_pic.bit_depth_chroma_minus8 = pic->seq.bit_depth_chroma_minus8; enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type; enc->enc_pic.max_num_merge_cand = pic->slice.max_num_merge_cand; - enc->enc_pic.sample_adaptive_offset_enabled_flag = - pic->seq.sample_adaptive_offset_enabled_flag; - enc->enc_pic.pcm_enabled_flag = 0; /*HW not support PCM */ - enc->enc_pic.sps_temporal_mvp_enabled_flag = - pic->seq.sps_temporal_mvp_enabled_flag; + enc->enc_pic.sample_adaptive_offset_enabled_flag = pic->seq.sample_adaptive_offset_enabled_flag; + enc->enc_pic.pcm_enabled_flag = 0; /*HW not support PCM */ + enc->enc_pic.sps_temporal_mvp_enabled_flag = pic->seq.sps_temporal_mvp_enabled_flag; } -static void -flush(struct radeon_uvd_encoder *enc) +static void flush(struct radeon_uvd_encoder *enc) { enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); } -static void -radeon_uvd_enc_flush(struct pipe_video_codec *encoder) +static void radeon_uvd_enc_flush(struct pipe_video_codec *encoder) { - struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder; + struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder; flush(enc); } -static void -radeon_uvd_enc_cs_flush(void *ctx, unsigned flags, - struct pipe_fence_handle **fence) +static void radeon_uvd_enc_cs_flush(void *ctx, unsigned flags, struct pipe_fence_handle **fence) { // just ignored } -static unsigned -get_cpb_num(struct radeon_uvd_encoder *enc) +static unsigned get_cpb_num(struct radeon_uvd_encoder *enc) { unsigned w = align(enc->base.width, 16) / 16; unsigned h = align(enc->base.height, 16) / 16; @@ -176,16 +157,14 @@ get_cpb_num(struct radeon_uvd_encoder *enc) return MIN2(dpb / (w * h), 16); } -static void -radeon_uvd_enc_begin_frame(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_picture_desc *picture) +static void radeon_uvd_enc_begin_frame(struct pipe_video_codec *encoder, + struct pipe_video_buffer *source, + struct pipe_picture_desc *picture) { - struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder; - struct vl_video_buffer *vid_buf = (struct vl_video_buffer *) source; + struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder; + struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source; - radeon_uvd_enc_get_param(enc, - (struct pipe_h265_enc_picture_desc *) picture); + radeon_uvd_enc_get_param(enc, (struct pipe_h265_enc_picture_desc *)picture); enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma); enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma); @@ -196,8 +175,7 @@ radeon_uvd_enc_begin_frame(struct pipe_video_codec *encoder, struct rvid_buffer fb; enc->stream_handle = si_vid_alloc_stream_handle(); enc->si = CALLOC_STRUCT(rvid_buffer); - si_vid_create_buffer(enc->screen, enc->si, 128 * 1024, - PIPE_USAGE_STAGING); + si_vid_create_buffer(enc->screen, enc->si, 128 * 1024, PIPE_USAGE_STAGING); si_vid_create_buffer(enc->screen, &fb, 4096, PIPE_USAGE_STAGING); enc->fb = &fb; enc->begin(enc, picture); @@ -206,12 +184,11 @@ radeon_uvd_enc_begin_frame(struct pipe_video_codec *encoder, } } -static void -radeon_uvd_enc_encode_bitstream(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_resource *destination, void **fb) +static void radeon_uvd_enc_encode_bitstream(struct pipe_video_codec *encoder, + struct pipe_video_buffer *source, + struct pipe_resource *destination, void **fb) { - struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder; + struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder; enc->get_buffer(destination, &enc->bs_handle, NULL); enc->bs_size = destination->width0; @@ -226,19 +203,17 @@ radeon_uvd_enc_encode_bitstream(struct pipe_video_codec *encoder, enc->encode(enc); } -static void -radeon_uvd_enc_end_frame(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_picture_desc *picture) +static void radeon_uvd_enc_end_frame(struct pipe_video_codec *encoder, + struct pipe_video_buffer *source, + struct pipe_picture_desc *picture) { - struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder; + struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder; flush(enc); } -static void -radeon_uvd_enc_destroy(struct pipe_video_codec *encoder) +static void radeon_uvd_enc_destroy(struct pipe_video_codec *encoder) { - struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder; + struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder; if (enc->stream_handle) { struct rvid_buffer fb; @@ -255,18 +230,15 @@ radeon_uvd_enc_destroy(struct pipe_video_codec *encoder) FREE(enc); } -static void -radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder, - void *feedback, unsigned *size) +static void radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder, void *feedback, + unsigned *size) { - struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *) encoder; + struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder; struct rvid_buffer *fb = feedback; if (NULL != size) { - radeon_uvd_enc_feedback_t *fb_data = - (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map( - fb->res->buf, enc->cs, - PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); + radeon_uvd_enc_feedback_t *fb_data = (radeon_uvd_enc_feedback_t *)enc->ws->buffer_map( + fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); if (!fb_data->status) *size = fb_data->bitstream_size; @@ -279,16 +251,15 @@ radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder, FREE(fb); } -struct pipe_video_codec * -radeon_uvd_create_encoder(struct pipe_context *context, - const struct pipe_video_codec *templ, - struct radeon_winsys *ws, - radeon_uvd_enc_get_buffer get_buffer) +struct pipe_video_codec *radeon_uvd_create_encoder(struct pipe_context *context, + const struct pipe_video_codec *templ, + struct radeon_winsys *ws, + radeon_uvd_enc_get_buffer get_buffer) { - struct si_screen *sscreen = (struct si_screen *) context->screen; - struct si_context *sctx = (struct si_context *) context; + struct si_screen *sscreen = (struct si_screen *)context->screen; + struct si_context *sctx = (struct si_context *)context; struct radeon_uvd_encoder *enc; - struct pipe_video_buffer *tmp_buf, templat = { }; + struct pipe_video_buffer *tmp_buf, templat = {}; struct radeon_surf *tmp_surf; unsigned cpb_size; @@ -314,8 +285,7 @@ radeon_uvd_create_encoder(struct pipe_context *context, enc->bits_in_shifter = 0; enc->screen = context->screen; enc->ws = ws; - enc->cs = - ws->cs_create(sctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc, false); + enc->cs = ws->cs_create(sctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc, false); if (!enc->cs) { RVID_ERR("Can't get command submission context.\n"); @@ -341,21 +311,19 @@ radeon_uvd_create_encoder(struct pipe_context *context, if (!enc->cpb_num) goto error; - get_buffer(((struct vl_video_buffer *) tmp_buf)->resources[0], NULL, - &tmp_surf); + get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); - cpb_size = (sscreen->info.chip_class < GFX9) ? - align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * - align(tmp_surf->u.legacy.level[0].nblk_y, 32) : - align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) * - align(tmp_surf->u.gfx9.surf_height, 32); + cpb_size = (sscreen->info.chip_class < GFX9) + ? align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * + align(tmp_surf->u.legacy.level[0].nblk_y, 32) + : align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) * + align(tmp_surf->u.gfx9.surf_height, 32); cpb_size = cpb_size * 3 / 2; cpb_size = cpb_size * enc->cpb_num; tmp_buf->destroy(tmp_buf); - if (!si_vid_create_buffer - (enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { + if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { RVID_ERR("Can't create CPB buffer.\n"); goto error; } @@ -364,7 +332,7 @@ radeon_uvd_create_encoder(struct pipe_context *context, return &enc->base; - error: +error: if (enc->cs) enc->ws->cs_destroy(enc->cs); @@ -374,8 +342,7 @@ radeon_uvd_create_encoder(struct pipe_context *context, return NULL; } -bool -si_radeon_uvd_enc_supported(struct si_screen * sscreen) +bool si_radeon_uvd_enc_supported(struct si_screen *sscreen) { return (sscreen->info.uvd_enc_supported); } diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.h b/src/gallium/drivers/radeon/radeon_uvd_enc.h index a0f18b68742..c121dc60dc7 100644 --- a/src/gallium/drivers/radeon/radeon_uvd_enc.h +++ b/src/gallium/drivers/radeon/radeon_uvd_enc.h @@ -30,106 +30,105 @@ #include "radeon_video.h" -#define RENC_UVD_FW_INTERFACE_MAJOR_VERSION 1 -#define RENC_UVD_FW_INTERFACE_MINOR_VERSION 1 - -#define RENC_UVD_IB_PARAM_SESSION_INFO 0x00000001 -#define RENC_UVD_IB_PARAM_TASK_INFO 0x00000002 -#define RENC_UVD_IB_PARAM_SESSION_INIT 0x00000003 -#define RENC_UVD_IB_PARAM_LAYER_CONTROL 0x00000004 -#define RENC_UVD_IB_PARAM_LAYER_SELECT 0x00000005 -#define RENC_UVD_IB_PARAM_SLICE_CONTROL 0x00000006 -#define RENC_UVD_IB_PARAM_SPEC_MISC 0x00000007 -#define RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000008 -#define RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000009 -#define RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x0000000a -#define RENC_UVD_IB_PARAM_SLICE_HEADER 0x0000000b -#define RENC_UVD_IB_PARAM_ENCODE_PARAMS 0x0000000c -#define RENC_UVD_IB_PARAM_QUALITY_PARAMS 0x0000000d -#define RENC_UVD_IB_PARAM_DEBLOCKING_FILTER 0x0000000e -#define RENC_UVD_IB_PARAM_INTRA_REFRESH 0x0000000f -#define RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x00000010 -#define RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER 0x00000011 -#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER 0x00000012 -#define RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER 0x00000013 -#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER_ADDITIONAL 0x00000014 - -#define RENC_UVD_IB_OP_INITIALIZE 0x08000001 -#define RENC_UVD_IB_OP_CLOSE_SESSION 0x08000002 -#define RENC_UVD_IB_OP_ENCODE 0x08000003 -#define RENC_UVD_IB_OP_INIT_RC 0x08000004 -#define RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x08000005 -#define RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE 0x08000006 -#define RENC_UVD_IB_OP_SET_BALANCE_ENCODING_MODE 0x08000007 -#define RENC_UVD_IB_OP_SET_QUALITY_ENCODING_MODE 0x08000008 - -#define RENC_UVD_IF_MAJOR_VERSION_MASK 0xFFFF0000 -#define RENC_UVD_IF_MAJOR_VERSION_SHIFT 16 -#define RENC_UVD_IF_MINOR_VERSION_MASK 0x0000FFFF -#define RENC_UVD_IF_MINOR_VERSION_SHIFT 0 - -#define RENC_UVD_PREENCODE_MODE_NONE 0x00000000 -#define RENC_UVD_PREENCODE_MODE_1X 0x00000001 -#define RENC_UVD_PREENCODE_MODE_2X 0x00000002 -#define RENC_UVD_PREENCODE_MODE_4X 0x00000004 - -#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS 0x00000000 -#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_BITS 0x00000001 - -#define RENC_UVD_RATE_CONTROL_METHOD_NONE 0x00000000 -#define RENC_UVD_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR 0x00000001 -#define RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR 0x00000002 -#define RENC_UVD_RATE_CONTROL_METHOD_CBR 0x00000003 - -#define RENC_UVD_NALU_TYPE_AUD 0x00000001 -#define RENC_UVD_NALU_TYPE_VPS 0x00000002 -#define RENC_UVD_NALU_TYPE_SPS 0x00000003 -#define RENC_UVD_NALU_TYPE_PPS 0x00000004 -#define RENC_UVD_NALU_TYPE_END_OF_SEQUENCE 0x00000005 - -#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16 -#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16 - -#define RENC_UVD_HEADER_INSTRUCTION_END 0 -#define RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END 1 -#define RENC_UVD_HEADER_INSTRUCTION_COPY 2 -#define RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE 3 -#define RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT 4 -#define RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA 5 - -#define RENC_UVD_PICTURE_TYPE_B 0 -#define RENC_UVD_PICTURE_TYPE_P 1 -#define RENC_UVD_PICTURE_TYPE_I 2 -#define RENC_UVD_PICTURE_TYPE_P_SKIP 3 - -#define RENC_UVD_SWIZZLE_MODE_LINEAR 0 -#define RENC_UVD_SWIZZLE_MODE_256B_D 2 -#define RENC_UVD_SWIZZLE_MODE_4kB_D 6 -#define RENC_UVD_SWIZZLE_MODE_64kB_D 10 -#define RENC_UVD_INTRA_REFRESH_MODE_NONE 0 -#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_ROWS 1 -#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_COLUMNS 2 - -#define RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES 34 -#define RENC_UVD_ADDR_MODE_LINEAR 0 -#define RENC_UVD_ADDR_MODE_PELE_8X8_1D 1 -#define RENC_UVD_ADDR_MODE_32AS8_88 2 - -#define RENC_UVD_ARRAY_MODE_LINEAR 0 -#define RENC_UVD_ARRAY_MODE_PELE_8X8_1D 2 -#define RENC_UVD_ARRAY_MODE_2D_TILED_THIN1 4 - -#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_LINEAR 0 -#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_CIRCULAR 1 - -#define RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR 0 -#define RENC_UVD_FEEDBACK_BUFFER_MODE_CIRCULAR 1 - -#define RENC_UVD_FEEDBACK_STATUS_OK 0x00000000 -#define RENC_UVD_FEEDBACK_STATUS_NOT_ENCODED 0x10000001 - -typedef struct radeon_uvd_enc_feedback_s -{ +#define RENC_UVD_FW_INTERFACE_MAJOR_VERSION 1 +#define RENC_UVD_FW_INTERFACE_MINOR_VERSION 1 + +#define RENC_UVD_IB_PARAM_SESSION_INFO 0x00000001 +#define RENC_UVD_IB_PARAM_TASK_INFO 0x00000002 +#define RENC_UVD_IB_PARAM_SESSION_INIT 0x00000003 +#define RENC_UVD_IB_PARAM_LAYER_CONTROL 0x00000004 +#define RENC_UVD_IB_PARAM_LAYER_SELECT 0x00000005 +#define RENC_UVD_IB_PARAM_SLICE_CONTROL 0x00000006 +#define RENC_UVD_IB_PARAM_SPEC_MISC 0x00000007 +#define RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000008 +#define RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000009 +#define RENC_UVD_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x0000000a +#define RENC_UVD_IB_PARAM_SLICE_HEADER 0x0000000b +#define RENC_UVD_IB_PARAM_ENCODE_PARAMS 0x0000000c +#define RENC_UVD_IB_PARAM_QUALITY_PARAMS 0x0000000d +#define RENC_UVD_IB_PARAM_DEBLOCKING_FILTER 0x0000000e +#define RENC_UVD_IB_PARAM_INTRA_REFRESH 0x0000000f +#define RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x00000010 +#define RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER 0x00000011 +#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER 0x00000012 +#define RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER 0x00000013 +#define RENC_UVD_IB_PARAM_FEEDBACK_BUFFER_ADDITIONAL 0x00000014 + +#define RENC_UVD_IB_OP_INITIALIZE 0x08000001 +#define RENC_UVD_IB_OP_CLOSE_SESSION 0x08000002 +#define RENC_UVD_IB_OP_ENCODE 0x08000003 +#define RENC_UVD_IB_OP_INIT_RC 0x08000004 +#define RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x08000005 +#define RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE 0x08000006 +#define RENC_UVD_IB_OP_SET_BALANCE_ENCODING_MODE 0x08000007 +#define RENC_UVD_IB_OP_SET_QUALITY_ENCODING_MODE 0x08000008 + +#define RENC_UVD_IF_MAJOR_VERSION_MASK 0xFFFF0000 +#define RENC_UVD_IF_MAJOR_VERSION_SHIFT 16 +#define RENC_UVD_IF_MINOR_VERSION_MASK 0x0000FFFF +#define RENC_UVD_IF_MINOR_VERSION_SHIFT 0 + +#define RENC_UVD_PREENCODE_MODE_NONE 0x00000000 +#define RENC_UVD_PREENCODE_MODE_1X 0x00000001 +#define RENC_UVD_PREENCODE_MODE_2X 0x00000002 +#define RENC_UVD_PREENCODE_MODE_4X 0x00000004 + +#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS 0x00000000 +#define RENC_UVD_SLICE_CONTROL_MODE_FIXED_BITS 0x00000001 + +#define RENC_UVD_RATE_CONTROL_METHOD_NONE 0x00000000 +#define RENC_UVD_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR 0x00000001 +#define RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR 0x00000002 +#define RENC_UVD_RATE_CONTROL_METHOD_CBR 0x00000003 + +#define RENC_UVD_NALU_TYPE_AUD 0x00000001 +#define RENC_UVD_NALU_TYPE_VPS 0x00000002 +#define RENC_UVD_NALU_TYPE_SPS 0x00000003 +#define RENC_UVD_NALU_TYPE_PPS 0x00000004 +#define RENC_UVD_NALU_TYPE_END_OF_SEQUENCE 0x00000005 + +#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16 +#define RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16 + +#define RENC_UVD_HEADER_INSTRUCTION_END 0 +#define RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END 1 +#define RENC_UVD_HEADER_INSTRUCTION_COPY 2 +#define RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE 3 +#define RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT 4 +#define RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA 5 + +#define RENC_UVD_PICTURE_TYPE_B 0 +#define RENC_UVD_PICTURE_TYPE_P 1 +#define RENC_UVD_PICTURE_TYPE_I 2 +#define RENC_UVD_PICTURE_TYPE_P_SKIP 3 + +#define RENC_UVD_SWIZZLE_MODE_LINEAR 0 +#define RENC_UVD_SWIZZLE_MODE_256B_D 2 +#define RENC_UVD_SWIZZLE_MODE_4kB_D 6 +#define RENC_UVD_SWIZZLE_MODE_64kB_D 10 +#define RENC_UVD_INTRA_REFRESH_MODE_NONE 0 +#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_ROWS 1 +#define RENC_UVD_INTRA_REFRESH_MODE_CTB_MB_COLUMNS 2 + +#define RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES 34 +#define RENC_UVD_ADDR_MODE_LINEAR 0 +#define RENC_UVD_ADDR_MODE_PELE_8X8_1D 1 +#define RENC_UVD_ADDR_MODE_32AS8_88 2 + +#define RENC_UVD_ARRAY_MODE_LINEAR 0 +#define RENC_UVD_ARRAY_MODE_PELE_8X8_1D 2 +#define RENC_UVD_ARRAY_MODE_2D_TILED_THIN1 4 + +#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_LINEAR 0 +#define RENC_UVD_VIDEO_BITSTREAM_BUFFER_MODE_CIRCULAR 1 + +#define RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR 0 +#define RENC_UVD_FEEDBACK_BUFFER_MODE_CIRCULAR 1 + +#define RENC_UVD_FEEDBACK_STATUS_OK 0x00000000 +#define RENC_UVD_FEEDBACK_STATUS_NOT_ENCODED 0x10000001 + +typedef struct radeon_uvd_enc_feedback_s { uint32_t task_id; uint32_t first_in_task; uint32_t last_in_task; @@ -142,23 +141,20 @@ typedef struct radeon_uvd_enc_feedback_s uint32_t extra_bytes; } radeon_uvd_enc_feedback_t; -typedef struct ruvd_enc_session_info_s -{ +typedef struct ruvd_enc_session_info_s { uint32_t reserved; uint32_t interface_version; uint32_t sw_context_address_hi; uint32_t sw_context_address_lo; } ruvd_enc_session_info_t; -typedef struct ruvd_enc_task_info_s -{ +typedef struct ruvd_enc_task_info_s { uint32_t total_size_of_all_packages; uint32_t task_id; uint32_t allowed_max_num_feedbacks; } ruvd_enc_task_info_t; -typedef struct ruvd_enc_session_init_s -{ +typedef struct ruvd_enc_session_init_s { uint32_t aligned_picture_width; uint32_t aligned_picture_height; uint32_t padding_width; @@ -167,38 +163,31 @@ typedef struct ruvd_enc_session_init_s uint32_t pre_encode_chroma_enabled; } ruvd_enc_session_init_t; -typedef struct ruvd_enc_layer_control_s -{ +typedef struct ruvd_enc_layer_control_s { uint32_t max_num_temporal_layers; uint32_t num_temporal_layers; } ruvd_enc_layer_control_t; -typedef struct ruvd_enc_layer_select_s -{ +typedef struct ruvd_enc_layer_select_s { uint32_t temporal_layer_index; } ruvd_enc_layer_select_t; -typedef struct ruvd_enc_hevc_slice_control_s -{ +typedef struct ruvd_enc_hevc_slice_control_s { uint32_t slice_control_mode; - union - { - struct - { + union { + struct { uint32_t num_ctbs_per_slice; uint32_t num_ctbs_per_slice_segment; } fixed_ctbs_per_slice; - struct - { + struct { uint32_t num_bits_per_slice; uint32_t num_bits_per_slice_segment; } fixed_bits_per_slice; }; } ruvd_enc_hevc_slice_control_t; -typedef struct ruvd_enc_hevc_spec_misc_s -{ +typedef struct ruvd_enc_hevc_spec_misc_s { uint32_t log2_min_luma_coding_block_size_minus3; uint32_t amp_disabled; uint32_t strong_intra_smoothing_enabled; @@ -208,14 +197,12 @@ typedef struct ruvd_enc_hevc_spec_misc_s uint32_t quarter_pel_enabled; } ruvd_enc_hevc_spec_misc_t; -typedef struct ruvd_enc_rate_ctl_session_init_s -{ +typedef struct ruvd_enc_rate_ctl_session_init_s { uint32_t rate_control_method; uint32_t vbv_buffer_level; } ruvd_enc_rate_ctl_session_init_t; -typedef struct ruvd_enc_rate_ctl_layer_init_s -{ +typedef struct ruvd_enc_rate_ctl_layer_init_s { uint32_t target_bit_rate; uint32_t peak_bit_rate; uint32_t frame_rate_num; @@ -226,8 +213,7 @@ typedef struct ruvd_enc_rate_ctl_layer_init_s uint32_t peak_bits_per_picture_fractional; } ruvd_enc_rate_ctl_layer_init_t; -typedef struct ruvd_enc_rate_ctl_per_picture_s -{ +typedef struct ruvd_enc_rate_ctl_per_picture_s { uint32_t qp; uint32_t min_qp_app; uint32_t max_qp_app; @@ -237,34 +223,27 @@ typedef struct ruvd_enc_rate_ctl_per_picture_s uint32_t enforce_hrd; } ruvd_enc_rate_ctl_per_picture_t; -typedef struct ruvd_enc_quality_params_s -{ +typedef struct ruvd_enc_quality_params_s { uint32_t vbaq_mode; uint32_t scene_change_sensitivity; uint32_t scene_change_min_idr_interval; } ruvd_enc_quality_params_t; -typedef struct ruvd_enc_direct_output_nalu_s -{ +typedef struct ruvd_enc_direct_output_nalu_s { uint32_t type; uint32_t size; uint32_t data[1]; } ruvd_enc_direct_output_nalu_t; -typedef struct ruvd_enc_slice_header_s -{ - uint32_t - bitstream_template - [RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS]; - struct - { +typedef struct ruvd_enc_slice_header_s { + uint32_t bitstream_template[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS]; + struct { uint32_t instruction; uint32_t num_bits; } instructions[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS]; } ruvd_enc_slice_header_t; -typedef struct ruvd_enc_encode_params_s -{ +typedef struct ruvd_enc_encode_params_s { uint32_t pic_type; uint32_t allowed_max_bitstream_size; uint32_t input_picture_luma_address_hi; @@ -273,13 +252,11 @@ typedef struct ruvd_enc_encode_params_s uint32_t input_picture_chroma_address_lo; uint32_t input_pic_luma_pitch; uint32_t input_pic_chroma_pitch; - union - { + union { uint32_t input_pic_addr_mode; uint32_t reserved; }; - union - { + union { uint32_t input_pic_array_mode; uint32_t input_pic_swizzle_mode; }; @@ -287,8 +264,7 @@ typedef struct ruvd_enc_encode_params_s uint32_t reconstructed_picture_index; } ruvd_enc_encode_params_t; -typedef struct ruvd_enc_hevc_deblocking_filter_s -{ +typedef struct ruvd_enc_hevc_deblocking_filter_s { uint32_t loop_filter_across_slices_enabled; int32_t deblocking_filter_disabled; int32_t beta_offset_div2; @@ -297,48 +273,40 @@ typedef struct ruvd_enc_hevc_deblocking_filter_s int32_t cr_qp_offset; } ruvd_enc_hevc_deblocking_filter_t; -typedef struct ruvd_enc_intra_refresh_s -{ +typedef struct ruvd_enc_intra_refresh_s { uint32_t intra_refresh_mode; uint32_t offset; uint32_t region_size; } ruvd_enc_intra_refresh_t; -typedef struct ruvd_enc_reconstructed_picture_s -{ +typedef struct ruvd_enc_reconstructed_picture_s { uint32_t luma_offset; uint32_t chroma_offset; } ruvd_enc_reconstructed_picture_t; -typedef struct ruvd_enc_encode_context_buffer_s -{ +typedef struct ruvd_enc_encode_context_buffer_s { uint32_t encode_context_address_hi; uint32_t encode_context_address_lo; - union - { + union { uint32_t addr_mode; uint32_t reserved; }; - union - { + union { uint32_t array_mode; uint32_t swizzle_mode; }; uint32_t rec_luma_pitch; uint32_t rec_chroma_pitch; uint32_t num_reconstructed_pictures; - ruvd_enc_reconstructed_picture_t - reconstructed_pictures[RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES]; + ruvd_enc_reconstructed_picture_t reconstructed_pictures[RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES]; uint32_t pre_encode_picture_luma_pitch; uint32_t pre_encode_picture_chroma_pitch; ruvd_enc_reconstructed_picture_t - pre_encode_reconstructed_pictures - [RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES]; + pre_encode_reconstructed_pictures[RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES]; ruvd_enc_reconstructed_picture_t pre_encode_input_picture; } ruvd_enc_encode_context_buffer_t; -typedef struct ruvd_enc_video_bitstream_buffer_s -{ +typedef struct ruvd_enc_video_bitstream_buffer_s { uint32_t mode; uint32_t video_bitstream_buffer_address_hi; uint32_t video_bitstream_buffer_address_lo; @@ -346,8 +314,7 @@ typedef struct ruvd_enc_video_bitstream_buffer_s uint32_t video_bitstream_data_offset; } ruvd_enc_video_bitstream_buffer_t; -typedef struct ruvd_enc_feedback_buffer_s -{ +typedef struct ruvd_enc_feedback_buffer_s { uint32_t mode; uint32_t feedback_buffer_address_hi; uint32_t feedback_buffer_address_lo; @@ -355,20 +322,15 @@ typedef struct ruvd_enc_feedback_buffer_s uint32_t feedback_data_size; } ruvd_enc_feedback_buffer_t; -typedef void (*radeon_uvd_enc_get_buffer) (struct pipe_resource * resource, - struct pb_buffer ** handle, - struct radeon_surf ** surface); +typedef void (*radeon_uvd_enc_get_buffer)(struct pipe_resource *resource, struct pb_buffer **handle, + struct radeon_surf **surface); -struct pipe_video_codec *radeon_uvd_create_encoder(struct pipe_context - *context, - const struct - pipe_video_codec *templat, +struct pipe_video_codec *radeon_uvd_create_encoder(struct pipe_context *context, + const struct pipe_video_codec *templat, struct radeon_winsys *ws, - radeon_uvd_enc_get_buffer - get_buffer); + radeon_uvd_enc_get_buffer get_buffer); -struct radeon_uvd_enc_pic -{ +struct radeon_uvd_enc_pic { enum pipe_h265_enc_picture_type picture_type; unsigned frame_num; @@ -422,14 +384,12 @@ struct radeon_uvd_enc_pic ruvd_enc_encode_params_t enc_params; }; -struct radeon_uvd_encoder -{ +struct radeon_uvd_encoder { struct pipe_video_codec base; - void (*begin) (struct radeon_uvd_encoder * enc, - struct pipe_picture_desc * pic); - void (*encode) (struct radeon_uvd_encoder * enc); - void (*destroy) (struct radeon_uvd_encoder * enc); + void (*begin)(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic); + void (*encode)(struct radeon_uvd_encoder *enc); + void (*destroy)(struct radeon_uvd_encoder *enc); unsigned stream_handle; diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c index 7e5be33ec54..029f3a6d3df 100644 --- a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c +++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c @@ -25,37 +25,39 @@ * **************************************************************************/ -#include <stdio.h> - #include "pipe/p_video_codec.h" - -#include "util/u_video.h" +#include "radeon_uvd_enc.h" +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" - +#include "util/u_video.h" #include "vl/vl_video_buffer.h" -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_uvd_enc.h" + +#include <stdio.h> #define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value)) -#define RADEON_ENC_BEGIN(cmd) { \ - uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ -RADEON_ENC_CS(cmd) -#define RADEON_ENC_READ(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) -#define RADEON_ENC_WRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off)) -#define RADEON_ENC_READWRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) -#define RADEON_ENC_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \ - enc->total_task_size += *begin;} - -static const unsigned index_to_shifts[4] = { 24, 16, 8, 0 }; - -static void -radeon_uvd_enc_add_buffer(struct radeon_uvd_encoder *enc, - struct pb_buffer *buf, enum radeon_bo_usage usage, - enum radeon_bo_domain domain, signed offset) +#define RADEON_ENC_BEGIN(cmd) \ + { \ + uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ + RADEON_ENC_CS(cmd) +#define RADEON_ENC_READ(buf, domain, off) \ + radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) +#define RADEON_ENC_WRITE(buf, domain, off) \ + radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off)) +#define RADEON_ENC_READWRITE(buf, domain, off) \ + radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) +#define RADEON_ENC_END() \ + *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \ + enc->total_task_size += *begin; \ + } + +static const unsigned index_to_shifts[4] = {24, 16, 8, 0}; + +static void radeon_uvd_enc_add_buffer(struct radeon_uvd_encoder *enc, struct pb_buffer *buf, + enum radeon_bo_usage usage, enum radeon_bo_domain domain, + signed offset) { - enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); + enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); uint64_t addr; addr = enc->ws->buffer_get_virtual_address(buf); addr = addr + offset; @@ -63,9 +65,7 @@ radeon_uvd_enc_add_buffer(struct radeon_uvd_encoder *enc, RADEON_ENC_CS(addr); } -static void -radeon_uvd_enc_set_emulation_prevention(struct radeon_uvd_encoder *enc, - bool set) +static void radeon_uvd_enc_set_emulation_prevention(struct radeon_uvd_encoder *enc, bool set) { if (set != enc->emulation_prevention) { enc->emulation_prevention = set; @@ -73,14 +73,12 @@ radeon_uvd_enc_set_emulation_prevention(struct radeon_uvd_encoder *enc, } } -static void -radeon_uvd_enc_output_one_byte(struct radeon_uvd_encoder *enc, - unsigned char byte) +static void radeon_uvd_enc_output_one_byte(struct radeon_uvd_encoder *enc, unsigned char byte) { if (enc->byte_index == 0) enc->cs->current.buf[enc->cs->current.cdw] = 0; enc->cs->current.buf[enc->cs->current.cdw] |= - ((unsigned int) (byte) << index_to_shifts[enc->byte_index]); + ((unsigned int)(byte) << index_to_shifts[enc->byte_index]); enc->byte_index++; if (enc->byte_index >= 4) { @@ -89,14 +87,11 @@ radeon_uvd_enc_output_one_byte(struct radeon_uvd_encoder *enc, } } -static void -radeon_uvd_enc_emulation_prevention(struct radeon_uvd_encoder *enc, - unsigned char byte) +static void radeon_uvd_enc_emulation_prevention(struct radeon_uvd_encoder *enc, unsigned char byte) { if (enc->emulation_prevention) { - if ((enc->num_zeros >= 2) - && ((byte == 0x00) || (byte == 0x01) - || (byte == 0x02) || (byte == 0x03))) { + if ((enc->num_zeros >= 2) && + ((byte == 0x00) || (byte == 0x01) || (byte == 0x02) || (byte == 0x03))) { radeon_uvd_enc_output_one_byte(enc, 0x03); enc->bits_output += 8; enc->num_zeros = 0; @@ -105,28 +100,25 @@ radeon_uvd_enc_emulation_prevention(struct radeon_uvd_encoder *enc, } } -static void -radeon_uvd_enc_code_fixed_bits(struct radeon_uvd_encoder *enc, - unsigned int value, unsigned int num_bits) +static void radeon_uvd_enc_code_fixed_bits(struct radeon_uvd_encoder *enc, unsigned int value, + unsigned int num_bits) { unsigned int bits_to_pack = 0; while (num_bits > 0) { unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits)); bits_to_pack = - num_bits > - (32 - enc->bits_in_shifter) ? (32 - enc->bits_in_shifter) : num_bits; + num_bits > (32 - enc->bits_in_shifter) ? (32 - enc->bits_in_shifter) : num_bits; if (bits_to_pack < num_bits) value_to_pack = value_to_pack >> (num_bits - bits_to_pack); - enc->shifter |= - value_to_pack << (32 - enc->bits_in_shifter - bits_to_pack); + enc->shifter |= value_to_pack << (32 - enc->bits_in_shifter - bits_to_pack); num_bits -= bits_to_pack; enc->bits_in_shifter += bits_to_pack; while (enc->bits_in_shifter >= 8) { - unsigned char output_byte = (unsigned char) (enc->shifter >> 24); + unsigned char output_byte = (unsigned char)(enc->shifter >> 24); enc->shifter <<= 8; radeon_uvd_enc_emulation_prevention(enc, output_byte); radeon_uvd_enc_output_one_byte(enc, output_byte); @@ -136,8 +128,7 @@ radeon_uvd_enc_code_fixed_bits(struct radeon_uvd_encoder *enc, } } -static void -radeon_uvd_enc_reset(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_reset(struct radeon_uvd_encoder *enc) { enc->emulation_prevention = false; enc->shifter = 0; @@ -147,8 +138,7 @@ radeon_uvd_enc_reset(struct radeon_uvd_encoder *enc) enc->byte_index = 0; } -static void -radeon_uvd_enc_byte_align(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_byte_align(struct radeon_uvd_encoder *enc) { unsigned int num_padding_zeros = (32 - enc->bits_in_shifter) % 8; @@ -156,11 +146,10 @@ radeon_uvd_enc_byte_align(struct radeon_uvd_encoder *enc) radeon_uvd_enc_code_fixed_bits(enc, 0, num_padding_zeros); } -static void -radeon_uvd_enc_flush_headers(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_flush_headers(struct radeon_uvd_encoder *enc) { if (enc->bits_in_shifter != 0) { - unsigned char output_byte = (unsigned char) (enc->shifter >> 24); + unsigned char output_byte = (unsigned char)(enc->shifter >> 24); radeon_uvd_enc_emulation_prevention(enc, output_byte); radeon_uvd_enc_output_one_byte(enc, output_byte); enc->bits_output += enc->bits_in_shifter; @@ -175,8 +164,7 @@ radeon_uvd_enc_flush_headers(struct radeon_uvd_encoder *enc) } } -static void -radeon_uvd_enc_code_ue(struct radeon_uvd_encoder *enc, unsigned int value) +static void radeon_uvd_enc_code_ue(struct radeon_uvd_encoder *enc, unsigned int value) { int x = -1; unsigned int ue_code = value + 1; @@ -191,35 +179,29 @@ radeon_uvd_enc_code_ue(struct radeon_uvd_encoder *enc, unsigned int value) radeon_uvd_enc_code_fixed_bits(enc, ue_code, ue_length); } -static void -radeon_uvd_enc_code_se(struct radeon_uvd_encoder *enc, int value) +static void radeon_uvd_enc_code_se(struct radeon_uvd_encoder *enc, int value) { unsigned int v = 0; if (value != 0) - v = (value < 0 ? ((unsigned int) (0 - value) << 1) - : (((unsigned int) (value) << 1) - 1)); + v = (value < 0 ? ((unsigned int)(0 - value) << 1) : (((unsigned int)(value) << 1) - 1)); radeon_uvd_enc_code_ue(enc, v); } -static void -radeon_uvd_enc_session_info(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_session_info(struct radeon_uvd_encoder *enc) { unsigned int interface_version = - ((RENC_UVD_FW_INTERFACE_MAJOR_VERSION << - RENC_UVD_IF_MAJOR_VERSION_SHIFT) | - (RENC_UVD_FW_INTERFACE_MINOR_VERSION << - RENC_UVD_IF_MINOR_VERSION_SHIFT)); + ((RENC_UVD_FW_INTERFACE_MAJOR_VERSION << RENC_UVD_IF_MAJOR_VERSION_SHIFT) | + (RENC_UVD_FW_INTERFACE_MINOR_VERSION << RENC_UVD_IF_MINOR_VERSION_SHIFT)); RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INFO); - RADEON_ENC_CS(0x00000000); // reserved + RADEON_ENC_CS(0x00000000); // reserved RADEON_ENC_CS(interface_version); RADEON_ENC_READWRITE(enc->si->res->buf, enc->si->res->domains, 0x0); RADEON_ENC_END(); } -static void -radeon_uvd_enc_task_info(struct radeon_uvd_encoder *enc, bool need_feedback) +static void radeon_uvd_enc_task_info(struct radeon_uvd_encoder *enc, bool need_feedback) { enc->enc_pic.task_info.task_id++; @@ -235,13 +217,10 @@ radeon_uvd_enc_task_info(struct radeon_uvd_encoder *enc, bool need_feedback) RADEON_ENC_END(); } -static void -radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc) { - enc->enc_pic.session_init.aligned_picture_width = - align(enc->base.width, 64); - enc->enc_pic.session_init.aligned_picture_height = - align(enc->base.height, 16); + enc->enc_pic.session_init.aligned_picture_width = align(enc->base.width, 64); + enc->enc_pic.session_init.aligned_picture_height = align(enc->base.height, 16); enc->enc_pic.session_init.padding_width = enc->enc_pic.session_init.aligned_picture_width - enc->base.width; enc->enc_pic.session_init.padding_height = @@ -259,8 +238,7 @@ radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_layer_control(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_layer_control(struct radeon_uvd_encoder *enc) { enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1; enc->enc_pic.layer_ctrl.num_temporal_layers = 1; @@ -271,8 +249,7 @@ radeon_uvd_enc_layer_control(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_layer_select(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_layer_select(struct radeon_uvd_encoder *enc) { enc->enc_pic.layer_sel.temporal_layer_index = 0; @@ -281,46 +258,37 @@ radeon_uvd_enc_layer_select(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_slice_control_hevc(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_slice_control_hevc(struct radeon_uvd_encoder *enc) { - enc->enc_pic.hevc_slice_ctrl.slice_control_mode = - RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS; + enc->enc_pic.hevc_slice_ctrl.slice_control_mode = RENC_UVD_SLICE_CONTROL_MODE_FIXED_CTBS; enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice = align(enc->base.width, 64) / 64 * align(enc->base.height, 64) / 64; - enc->enc_pic.hevc_slice_ctrl. - fixed_ctbs_per_slice.num_ctbs_per_slice_segment = + enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice_segment = enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice; RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_CONTROL); RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.slice_control_mode); - RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl. - fixed_ctbs_per_slice.num_ctbs_per_slice); - RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl. - fixed_ctbs_per_slice.num_ctbs_per_slice_segment); + RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice); + RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice_segment); RADEON_ENC_END(); } -static void -radeon_uvd_enc_spec_misc_hevc(struct radeon_uvd_encoder *enc, - struct pipe_picture_desc *picture) +static void radeon_uvd_enc_spec_misc_hevc(struct radeon_uvd_encoder *enc, + struct pipe_picture_desc *picture) { - struct pipe_h265_enc_picture_desc *pic = - (struct pipe_h265_enc_picture_desc *) picture; + struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 = pic->seq.log2_min_luma_coding_block_size_minus3; enc->enc_pic.hevc_spec_misc.amp_disabled = !pic->seq.amp_enabled_flag; enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled = pic->seq.strong_intra_smoothing_enabled_flag; - enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag = - pic->pic.constrained_intra_pred_flag; + enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag = pic->pic.constrained_intra_pred_flag; enc->enc_pic.hevc_spec_misc.cabac_init_flag = pic->slice.cabac_init_flag; enc->enc_pic.hevc_spec_misc.half_pel_enabled = 1; enc->enc_pic.hevc_spec_misc.quarter_pel_enabled = 1; RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SPEC_MISC); - RADEON_ENC_CS(enc->enc_pic. - hevc_spec_misc.log2_min_luma_coding_block_size_minus3); + RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3); RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.amp_disabled); RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled); RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag); @@ -330,22 +298,18 @@ radeon_uvd_enc_spec_misc_hevc(struct radeon_uvd_encoder *enc, RADEON_ENC_END(); } -static void -radeon_uvd_enc_rc_session_init(struct radeon_uvd_encoder *enc, - struct pipe_picture_desc *picture) +static void radeon_uvd_enc_rc_session_init(struct radeon_uvd_encoder *enc, + struct pipe_picture_desc *picture) { - struct pipe_h265_enc_picture_desc *pic = - (struct pipe_h265_enc_picture_desc *) picture; + struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; enc->enc_pic.rc_session_init.vbv_buffer_level = pic->rc.vbv_buf_lv; switch (pic->rc.rate_ctrl_method) { case PIPE_H265_ENC_RATE_CONTROL_METHOD_DISABLE: - enc->enc_pic.rc_session_init.rate_control_method = - RENC_UVD_RATE_CONTROL_METHOD_NONE; + enc->enc_pic.rc_session_init.rate_control_method = RENC_UVD_RATE_CONTROL_METHOD_NONE; break; case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP: case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT: - enc->enc_pic.rc_session_init.rate_control_method = - RENC_UVD_RATE_CONTROL_METHOD_CBR; + enc->enc_pic.rc_session_init.rate_control_method = RENC_UVD_RATE_CONTROL_METHOD_CBR; break; case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP: case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE: @@ -353,8 +317,7 @@ radeon_uvd_enc_rc_session_init(struct radeon_uvd_encoder *enc, RENC_UVD_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR; break; default: - enc->enc_pic.rc_session_init.rate_control_method = - RENC_UVD_RATE_CONTROL_METHOD_NONE; + enc->enc_pic.rc_session_init.rate_control_method = RENC_UVD_RATE_CONTROL_METHOD_NONE; } RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_SESSION_INIT); @@ -363,23 +326,18 @@ radeon_uvd_enc_rc_session_init(struct radeon_uvd_encoder *enc, RADEON_ENC_END(); } -static void -radeon_uvd_enc_rc_layer_init(struct radeon_uvd_encoder *enc, - struct pipe_picture_desc *picture) +static void radeon_uvd_enc_rc_layer_init(struct radeon_uvd_encoder *enc, + struct pipe_picture_desc *picture) { - struct pipe_h265_enc_picture_desc *pic = - (struct pipe_h265_enc_picture_desc *) picture; + struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; enc->enc_pic.rc_layer_init.target_bit_rate = pic->rc.target_bitrate; enc->enc_pic.rc_layer_init.peak_bit_rate = pic->rc.peak_bitrate; enc->enc_pic.rc_layer_init.frame_rate_num = pic->rc.frame_rate_num; enc->enc_pic.rc_layer_init.frame_rate_den = pic->rc.frame_rate_den; enc->enc_pic.rc_layer_init.vbv_buffer_size = pic->rc.vbv_buffer_size; - enc->enc_pic.rc_layer_init.avg_target_bits_per_picture = - pic->rc.target_bits_picture; - enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer = - pic->rc.peak_bits_picture_integer; - enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional = - pic->rc.peak_bits_picture_fraction; + enc->enc_pic.rc_layer_init.avg_target_bits_per_picture = pic->rc.target_bits_picture; + enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer = pic->rc.peak_bits_picture_integer; + enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional = pic->rc.peak_bits_picture_fraction; RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_RATE_CONTROL_LAYER_INIT); RADEON_ENC_CS(enc->enc_pic.rc_layer_init.target_bit_rate); @@ -393,18 +351,15 @@ radeon_uvd_enc_rc_layer_init(struct radeon_uvd_encoder *enc, RADEON_ENC_END(); } -static void -radeon_uvd_enc_deblocking_filter_hevc(struct radeon_uvd_encoder *enc, - struct pipe_picture_desc *picture) +static void radeon_uvd_enc_deblocking_filter_hevc(struct radeon_uvd_encoder *enc, + struct pipe_picture_desc *picture) { - struct pipe_h265_enc_picture_desc *pic = - (struct pipe_h265_enc_picture_desc *) picture; + struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled = pic->slice.slice_loop_filter_across_slices_enabled_flag; enc->enc_pic.hevc_deblock.deblocking_filter_disabled = pic->slice.slice_deblocking_filter_disabled_flag; - enc->enc_pic.hevc_deblock.beta_offset_div2 = - pic->slice.slice_beta_offset_div2; + enc->enc_pic.hevc_deblock.beta_offset_div2 = pic->slice.slice_beta_offset_div2; enc->enc_pic.hevc_deblock.tc_offset_div2 = pic->slice.slice_tc_offset_div2; enc->enc_pic.hevc_deblock.cb_qp_offset = pic->slice.slice_cb_qp_offset; enc->enc_pic.hevc_deblock.cr_qp_offset = pic->slice.slice_cr_qp_offset; @@ -419,8 +374,7 @@ radeon_uvd_enc_deblocking_filter_hevc(struct radeon_uvd_encoder *enc, RADEON_ENC_END(); } -static void -radeon_uvd_enc_quality_params(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_quality_params(struct radeon_uvd_encoder *enc) { enc->enc_pic.quality_params.vbaq_mode = 0; enc->enc_pic.quality_params.scene_change_sensitivity = 0; @@ -433,8 +387,7 @@ radeon_uvd_enc_quality_params(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER); RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS); @@ -448,9 +401,7 @@ radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc) radeon_uvd_enc_byte_align(enc); radeon_uvd_enc_set_emulation_prevention(enc, true); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4); - radeon_uvd_enc_code_fixed_bits(enc, - enc->enc_pic. - layer_ctrl.max_num_temporal_layers - 1, 3); + radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1, 3); radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2); radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1); @@ -470,16 +421,13 @@ radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc) radeon_uvd_enc_code_ue(enc, 0x0); radeon_uvd_enc_code_ue(enc, enc->enc_pic.chroma_format_idc); - radeon_uvd_enc_code_ue(enc, - enc->enc_pic.session_init.aligned_picture_width); - radeon_uvd_enc_code_ue(enc, - enc->enc_pic.session_init.aligned_picture_height); - - int conformance_window_flag = - (enc->enc_pic.crop_top > 0) || - (enc->enc_pic.crop_bottom > 0) || - (enc->enc_pic.crop_left > 0) || - (enc->enc_pic.crop_right > 0) ? 0x1 : 0x0; + radeon_uvd_enc_code_ue(enc, enc->enc_pic.session_init.aligned_picture_width); + radeon_uvd_enc_code_ue(enc, enc->enc_pic.session_init.aligned_picture_height); + + int conformance_window_flag = (enc->enc_pic.crop_top > 0) || (enc->enc_pic.crop_bottom > 0) || + (enc->enc_pic.crop_left > 0) || (enc->enc_pic.crop_right > 0) + ? 0x1 + : 0x0; radeon_uvd_enc_code_fixed_bits(enc, conformance_window_flag, 1); if (conformance_window_flag == 1) { radeon_uvd_enc_code_ue(enc, enc->enc_pic.crop_left); @@ -495,31 +443,18 @@ radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc) radeon_uvd_enc_code_ue(enc, 1); radeon_uvd_enc_code_ue(enc, 0x0); radeon_uvd_enc_code_ue(enc, 0x0); - radeon_uvd_enc_code_ue(enc, - enc->enc_pic.hevc_spec_misc. - log2_min_luma_coding_block_size_minus3); + radeon_uvd_enc_code_ue(enc, enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3); /* Only support CTBSize 64 */ - radeon_uvd_enc_code_ue(enc, - 6 - - (enc->enc_pic.hevc_spec_misc. - log2_min_luma_coding_block_size_minus3 + 3)); - radeon_uvd_enc_code_ue(enc, - enc->enc_pic.log2_min_transform_block_size_minus2); - radeon_uvd_enc_code_ue(enc, - enc->enc_pic. - log2_diff_max_min_transform_block_size); - radeon_uvd_enc_code_ue(enc, - enc->enc_pic.max_transform_hierarchy_depth_inter); - radeon_uvd_enc_code_ue(enc, - enc->enc_pic.max_transform_hierarchy_depth_intra); + radeon_uvd_enc_code_ue( + enc, 6 - (enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 + 3)); + radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_min_transform_block_size_minus2); + radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_diff_max_min_transform_block_size); + radeon_uvd_enc_code_ue(enc, enc->enc_pic.max_transform_hierarchy_depth_inter); + radeon_uvd_enc_code_ue(enc, enc->enc_pic.max_transform_hierarchy_depth_intra); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); - radeon_uvd_enc_code_fixed_bits(enc, - !enc->enc_pic.hevc_spec_misc.amp_disabled, - 1); - radeon_uvd_enc_code_fixed_bits(enc, - enc->enc_pic. - sample_adaptive_offset_enabled_flag, 1); + radeon_uvd_enc_code_fixed_bits(enc, !enc->enc_pic.hevc_spec_misc.amp_disabled, 1); + radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.sample_adaptive_offset_enabled_flag, 1); radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1); radeon_uvd_enc_code_ue(enc, 1); @@ -531,9 +466,8 @@ radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc) radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); radeon_uvd_enc_code_fixed_bits(enc, 0, 1); - radeon_uvd_enc_code_fixed_bits(enc, - enc->enc_pic.hevc_spec_misc. - strong_intra_smoothing_enabled, 1); + radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled, + 1); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); @@ -547,8 +481,7 @@ radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER); RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS); @@ -569,12 +502,9 @@ radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc) radeon_uvd_enc_code_ue(enc, 0x0); radeon_uvd_enc_code_ue(enc, 0x0); radeon_uvd_enc_code_se(enc, 0x0); - radeon_uvd_enc_code_fixed_bits(enc, - enc->enc_pic.hevc_spec_misc. - constrained_intra_pred_flag, 1); + radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag, 1); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); - if (enc->enc_pic.rc_session_init.rate_control_method == - RENC_UVD_RATE_CONTROL_METHOD_NONE) + if (enc->enc_pic.rc_session_init.rate_control_method == RENC_UVD_RATE_CONTROL_METHOD_NONE) radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); else { radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1); @@ -587,14 +517,11 @@ radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc) radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); - radeon_uvd_enc_code_fixed_bits(enc, - enc->enc_pic.hevc_deblock. - loop_filter_across_slices_enabled, 1); + radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled, + 1); radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); - radeon_uvd_enc_code_fixed_bits(enc, - enc->enc_pic.hevc_deblock. - deblocking_filter_disabled, 1); + radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.deblocking_filter_disabled, 1); if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) { radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2); @@ -614,8 +541,7 @@ radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER); RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS); @@ -632,9 +558,7 @@ radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc) radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4); radeon_uvd_enc_code_fixed_bits(enc, 0x3, 2); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6); - radeon_uvd_enc_code_fixed_bits(enc, - enc->enc_pic.layer_ctrl. - max_num_temporal_layers - 1, 3); + radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1, 3); radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1); radeon_uvd_enc_code_fixed_bits(enc, 0xffff, 16); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2); @@ -671,8 +595,7 @@ radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER); RADEON_ENC_CS(RENC_UVD_NALU_TYPE_AUD); @@ -709,11 +632,10 @@ radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc) { - uint32_t instruction[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 }; - uint32_t num_bits[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 }; + uint32_t instruction[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; + uint32_t num_bits[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; unsigned int inst_index = 0; unsigned int bit_index = 0; unsigned int bits_copied = 0; @@ -736,8 +658,7 @@ radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc) instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE; inst_index++; - if ((enc->enc_pic.nal_unit_type >= 16) - && (enc->enc_pic.nal_unit_type <= 23)) + if ((enc->enc_pic.nal_unit_type >= 16) && (enc->enc_pic.nal_unit_type <= 23)) radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); radeon_uvd_enc_code_ue(enc, 0x0); @@ -771,11 +692,8 @@ radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc) radeon_uvd_enc_code_ue(enc, 0x1); } - if ((enc->enc_pic.nal_unit_type != 19) - && (enc->enc_pic.nal_unit_type != 20)) { - radeon_uvd_enc_code_fixed_bits(enc, - enc->enc_pic.pic_order_cnt, - enc->enc_pic.log2_max_poc); + if ((enc->enc_pic.nal_unit_type != 19) && (enc->enc_pic.nal_unit_type != 20)) { + radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt, enc->enc_pic.log2_max_poc); if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1); else { @@ -787,14 +705,12 @@ radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc) } if (enc->enc_pic.sample_adaptive_offset_enabled_flag) - radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); /* slice_sao_luma_flag */ + radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); /* slice_sao_luma_flag */ if ((enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) || (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_B)) { radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); - radeon_uvd_enc_code_fixed_bits(enc, - enc->enc_pic.hevc_spec_misc. - cabac_init_flag, 1); + radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.cabac_init_flag, 1); radeon_uvd_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand); } @@ -810,9 +726,8 @@ radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc) if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) && (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)) { - radeon_uvd_enc_code_fixed_bits(enc, - enc->enc_pic.hevc_deblock. - loop_filter_across_slices_enabled, 1); + radeon_uvd_enc_code_fixed_bits( + enc, enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled, 1); radeon_uvd_enc_flush_headers(enc); bit_index++; @@ -824,12 +739,10 @@ radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc) instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_END; - for (int i = bit_index; - i < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++) + for (int i = bit_index; i < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++) RADEON_ENC_CS(0x00000000); - for (int j = 0; j < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; - j++) { + for (int j = 0; j < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) { RADEON_ENC_CS(instruction[j]); RADEON_ENC_CS(num_bits[j]); } @@ -837,29 +750,24 @@ radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_ctx(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_ctx(struct radeon_uvd_encoder *enc) { - struct si_screen *sscreen = (struct si_screen *) enc->screen; + struct si_screen *sscreen = (struct si_screen *)enc->screen; enc->enc_pic.ctx_buf.swizzle_mode = 0; if (sscreen->info.chip_class < GFX9) { - enc->enc_pic.ctx_buf.rec_luma_pitch = - (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); + enc->enc_pic.ctx_buf.rec_luma_pitch = (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); enc->enc_pic.ctx_buf.rec_chroma_pitch = (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); - } - else { - enc->enc_pic.ctx_buf.rec_luma_pitch = - enc->luma->u.gfx9.surf_pitch * enc->luma->bpe; - enc->enc_pic.ctx_buf.rec_chroma_pitch = - enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe; + } else { + enc->enc_pic.ctx_buf.rec_luma_pitch = enc->luma->u.gfx9.surf_pitch * enc->luma->bpe; + enc->enc_pic.ctx_buf.rec_chroma_pitch = enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe; } enc->enc_pic.ctx_buf.num_reconstructed_pictures = 2; RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_CONTEXT_BUFFER); RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); - RADEON_ENC_CS(0x00000000); // reserved + RADEON_ENC_CS(0x00000000); // reserved RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode); RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch); RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch); @@ -867,14 +775,11 @@ radeon_uvd_enc_ctx(struct radeon_uvd_encoder *enc) /* reconstructed_picture_1_luma_offset */ RADEON_ENC_CS(0x00000000); /* reconstructed_picture_1_chroma_offset */ - RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch * - align(enc->base.height, 16)); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch * align(enc->base.height, 16)); /* reconstructed_picture_2_luma_offset */ - RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch * - align(enc->base.height, 16) * 3 / 2); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch * align(enc->base.height, 16) * 3 / 2); /* reconstructed_picture_2_chroma_offset */ - RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch * - align(enc->base.height, 16) * 5 / 2); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch * align(enc->base.height, 16) * 5 / 2); for (int i = 0; i < 136; i++) RADEON_ENC_CS(0x00000000); @@ -882,8 +787,7 @@ radeon_uvd_enc_ctx(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_bitstream(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_bitstream(struct radeon_uvd_encoder *enc) { enc->enc_pic.bit_buf.mode = RENC_UVD_SWIZZLE_MODE_LINEAR; enc->enc_pic.bit_buf.video_bitstream_buffer_size = enc->bs_size; @@ -897,8 +801,7 @@ radeon_uvd_enc_bitstream(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_feedback(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_feedback(struct radeon_uvd_encoder *enc) { enc->enc_pic.fb_buf.mode = RENC_UVD_FEEDBACK_BUFFER_MODE_LINEAR; enc->enc_pic.fb_buf.feedback_buffer_size = 16; @@ -912,11 +815,9 @@ radeon_uvd_enc_feedback(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_intra_refresh(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_intra_refresh(struct radeon_uvd_encoder *enc) { - enc->enc_pic.intra_ref.intra_refresh_mode = - RENC_UVD_INTRA_REFRESH_MODE_NONE; + enc->enc_pic.intra_ref.intra_refresh_mode = RENC_UVD_INTRA_REFRESH_MODE_NONE; enc->enc_pic.intra_ref.offset = 0; enc->enc_pic.intra_ref.region_size = 0; @@ -927,12 +828,10 @@ radeon_uvd_enc_intra_refresh(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void -radeon_uvd_enc_rc_per_pic(struct radeon_uvd_encoder *enc, - struct pipe_picture_desc *picture) +static void radeon_uvd_enc_rc_per_pic(struct radeon_uvd_encoder *enc, + struct pipe_picture_desc *picture) { - struct pipe_h265_enc_picture_desc *pic = - (struct pipe_h265_enc_picture_desc *) picture; + struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; enc->enc_pic.rc_per_pic.qp = pic->rc.quant_i_frames; enc->enc_pic.rc_per_pic.min_qp_app = 0; enc->enc_pic.rc_per_pic.max_qp_app = 51; @@ -952,10 +851,9 @@ radeon_uvd_enc_rc_per_pic(struct radeon_uvd_encoder *enc, RADEON_ENC_END(); } -static void -radeon_uvd_enc_encode_params_hevc(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_encode_params_hevc(struct radeon_uvd_encoder *enc) { - struct si_screen *sscreen = (struct si_screen *) enc->screen; + struct si_screen *sscreen = (struct si_screen *)enc->screen; switch (enc->enc_pic.picture_type) { case PIPE_H265_ENC_PICTURE_TYPE_I: case PIPE_H265_ENC_PICTURE_TYPE_IDR: @@ -980,94 +878,77 @@ radeon_uvd_enc_encode_params_hevc(struct radeon_uvd_encoder *enc) (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); enc->enc_pic.enc_params.input_pic_chroma_pitch = (enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); - } - else { - enc->enc_pic.enc_params.input_pic_luma_pitch = - enc->luma->u.gfx9.surf_pitch * enc->luma->bpe; + } else { + enc->enc_pic.enc_params.input_pic_luma_pitch = enc->luma->u.gfx9.surf_pitch * enc->luma->bpe; enc->enc_pic.enc_params.input_pic_chroma_pitch = enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe; } - enc->enc_pic.enc_params.input_pic_swizzle_mode = - RENC_UVD_SWIZZLE_MODE_LINEAR; + enc->enc_pic.enc_params.input_pic_swizzle_mode = RENC_UVD_SWIZZLE_MODE_LINEAR; if (enc->enc_pic.enc_params.pic_type == RENC_UVD_PICTURE_TYPE_I) enc->enc_pic.enc_params.reference_picture_index = 0xFFFFFFFF; else - enc->enc_pic.enc_params.reference_picture_index = - (enc->enc_pic.frame_num - 1) % 2; + enc->enc_pic.enc_params.reference_picture_index = (enc->enc_pic.frame_num - 1) % 2; - enc->enc_pic.enc_params.reconstructed_picture_index = - enc->enc_pic.frame_num % 2; + enc->enc_pic.enc_params.reconstructed_picture_index = enc->enc_pic.frame_num % 2; RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_ENCODE_PARAMS); RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type); RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size); if (sscreen->info.chip_class < GFX9) { - RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->luma->u.legacy.level[0].offset); - RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->chroma->u.legacy.level[0].offset); - } - else { - RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->luma->u.gfx9.surf_offset); - RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->chroma->u.gfx9.surf_offset); + RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->luma->u.legacy.level[0].offset); + RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->chroma->u.legacy.level[0].offset); + } else { + RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->luma->u.gfx9.surf_offset); + RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->chroma->u.gfx9.surf_offset); } RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_luma_pitch); RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_chroma_pitch); - RADEON_ENC_CS(0x00000000); // reserved + RADEON_ENC_CS(0x00000000); // reserved RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_swizzle_mode); RADEON_ENC_CS(enc->enc_pic.enc_params.reference_picture_index); RADEON_ENC_CS(enc->enc_pic.enc_params.reconstructed_picture_index); RADEON_ENC_END(); } -static void -radeon_uvd_enc_op_init(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_op_init(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INITIALIZE); RADEON_ENC_END(); } -static void -radeon_uvd_enc_op_close(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_op_close(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_OP_CLOSE_SESSION); RADEON_ENC_END(); } -static void -radeon_uvd_enc_op_enc(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_op_enc(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_OP_ENCODE); RADEON_ENC_END(); } -static void -radeon_uvd_enc_op_init_rc(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_op_init_rc(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC); RADEON_ENC_END(); } -static void -radeon_uvd_enc_op_init_rc_vbv(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_op_init_rc_vbv(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_OP_INIT_RC_VBV_BUFFER_LEVEL); RADEON_ENC_END(); } -static void -radeon_uvd_enc_op_speed(struct radeon_uvd_encoder *enc) +static void radeon_uvd_enc_op_speed(struct radeon_uvd_encoder *enc) { RADEON_ENC_BEGIN(RENC_UVD_IB_OP_SET_SPEED_ENCODING_MODE); RADEON_ENC_END(); } -static void -begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic) +static void begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic) { radeon_uvd_enc_session_info(enc); enc->total_task_size = 0; @@ -1091,8 +972,7 @@ begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic) *enc->p_task_size = (enc->total_task_size); } -static void -encode(struct radeon_uvd_encoder *enc) +static void encode(struct radeon_uvd_encoder *enc) { radeon_uvd_enc_session_info(enc); enc->total_task_size = 0; @@ -1118,8 +998,7 @@ encode(struct radeon_uvd_encoder *enc) *enc->p_task_size = (enc->total_task_size); } -static void -destroy(struct radeon_uvd_encoder *enc) +static void destroy(struct radeon_uvd_encoder *enc) { radeon_uvd_enc_session_info(enc); enc->total_task_size = 0; @@ -1128,8 +1007,7 @@ destroy(struct radeon_uvd_encoder *enc) *enc->p_task_size = (enc->total_task_size); } -void -radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc) +void radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc) { enc->begin = begin; enc->encode = encode; diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 66b4744a558..da831015e3b 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -25,37 +25,35 @@ * **************************************************************************/ -#include <stdio.h> +#include "radeon_vce.h" #include "pipe/p_video_codec.h" - -#include "util/u_video.h" +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" - +#include "util/u_video.h" #include "vl/vl_video_buffer.h" -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_vce.h" +#include <stdio.h> -#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8)) -#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8)) -#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8)) +#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8)) +#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8)) +#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8)) #define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8)) #define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8)) -#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8)) -#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8)) -#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8)) -#define FW_53 (53 << 24) +#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8)) +#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8)) +#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8)) +#define FW_53 (53 << 24) /** * flush commands to the hardware */ static void flush(struct rvce_encoder *enc) { - enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); - enc->task_info_idx = 0; - enc->bs_idx = 0; + enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); + enc->task_info_idx = 0; + enc->bs_idx = 0; } #if 0 @@ -89,17 +87,17 @@ static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb) */ static void reset_cpb(struct rvce_encoder *enc) { - unsigned i; - - list_inithead(&enc->cpb_slots); - for (i = 0; i < enc->cpb_num; ++i) { - struct rvce_cpb_slot *slot = &enc->cpb_array[i]; - slot->index = i; - slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP; - slot->frame_num = 0; - slot->pic_order_cnt = 0; - list_addtail(&slot->list, &enc->cpb_slots); - } + unsigned i; + + list_inithead(&enc->cpb_slots); + for (i = 0; i < enc->cpb_num; ++i) { + struct rvce_cpb_slot *slot = &enc->cpb_array[i]; + slot->index = i; + slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP; + slot->frame_num = 0; + slot->pic_order_cnt = 0; + list_addtail(&slot->list, &enc->cpb_slots); + } } /** @@ -107,32 +105,31 @@ static void reset_cpb(struct rvce_encoder *enc) */ static void sort_cpb(struct rvce_encoder *enc) { - struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL; + struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL; - LIST_FOR_EACH_ENTRY(i, &enc->cpb_slots, list) { - if (i->frame_num == enc->pic.ref_idx_l0) - l0 = i; + LIST_FOR_EACH_ENTRY (i, &enc->cpb_slots, list) { + if (i->frame_num == enc->pic.ref_idx_l0) + l0 = i; - if (i->frame_num == enc->pic.ref_idx_l1) - l1 = i; + if (i->frame_num == enc->pic.ref_idx_l1) + l1 = i; - if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0) - break; + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0) + break; - if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B && - l0 && l1) - break; - } + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B && l0 && l1) + break; + } - if (l1) { - list_del(&l1->list); - list_add(&l1->list, &enc->cpb_slots); - } + if (l1) { + list_del(&l1->list); + list_add(&l1->list, &enc->cpb_slots); + } - if (l0) { - list_del(&l0->list); - list_add(&l0->list, &enc->cpb_slots); - } + if (l0) { + list_del(&l0->list); + list_add(&l0->list, &enc->cpb_slots); + } } /** @@ -140,53 +137,53 @@ static void sort_cpb(struct rvce_encoder *enc) */ static unsigned get_cpb_num(struct rvce_encoder *enc) { - unsigned w = align(enc->base.width, 16) / 16; - unsigned h = align(enc->base.height, 16) / 16; - unsigned dpb; - - switch (enc->base.level) { - case 10: - dpb = 396; - break; - case 11: - dpb = 900; - break; - case 12: - case 13: - case 20: - dpb = 2376; - break; - case 21: - dpb = 4752; - break; - case 22: - case 30: - dpb = 8100; - break; - case 31: - dpb = 18000; - break; - case 32: - dpb = 20480; - break; - case 40: - case 41: - dpb = 32768; - break; - case 42: - dpb = 34816; - break; - case 50: - dpb = 110400; - break; - default: - case 51: - case 52: - dpb = 184320; - break; - } - - return MIN2(dpb / (w * h), 16); + unsigned w = align(enc->base.width, 16) / 16; + unsigned h = align(enc->base.height, 16) / 16; + unsigned dpb; + + switch (enc->base.level) { + case 10: + dpb = 396; + break; + case 11: + dpb = 900; + break; + case 12: + case 13: + case 20: + dpb = 2376; + break; + case 21: + dpb = 4752; + break; + case 22: + case 30: + dpb = 8100; + break; + case 31: + dpb = 18000; + break; + case 32: + dpb = 20480; + break; + case 40: + case 41: + dpb = 32768; + break; + case 42: + dpb = 34816; + break; + case 50: + dpb = 110400; + break; + default: + case 51: + case 52: + dpb = 184320; + break; + } + + return MIN2(dpb / (w * h), 16); } /** @@ -194,7 +191,7 @@ static unsigned get_cpb_num(struct rvce_encoder *enc) */ struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc) { - return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); } /** @@ -202,7 +199,7 @@ struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc) */ struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc) { - return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list); + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list); } /** @@ -210,29 +207,29 @@ struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc) */ struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc) { - return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list); + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list); } /** * Calculate the offsets into the CPB */ -void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, - signed *luma_offset, signed *chroma_offset) +void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, signed *luma_offset, + signed *chroma_offset) { - struct si_screen *sscreen = (struct si_screen *)enc->screen; - unsigned pitch, vpitch, fsize; - - if (sscreen->info.chip_class < GFX9) { - pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128); - vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16); - } else { - pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256); - vpitch = align(enc->luma->u.gfx9.surf_height, 16); - } - fsize = pitch * (vpitch + vpitch / 2); - - *luma_offset = slot->index * fsize; - *chroma_offset = *luma_offset + pitch * vpitch; + struct si_screen *sscreen = (struct si_screen *)enc->screen; + unsigned pitch, vpitch, fsize; + + if (sscreen->info.chip_class < GFX9) { + pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128); + vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16); + } else { + pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256); + vpitch = align(enc->luma->u.gfx9.surf_height, 16); + } + fsize = pitch * (vpitch + vpitch / 2); + + *luma_offset = slot->index * fsize; + *chroma_offset = *luma_offset + pitch * vpitch; } /** @@ -240,134 +237,128 @@ void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, */ static void rvce_destroy(struct pipe_video_codec *encoder) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; - if (enc->stream_handle) { - struct rvid_buffer fb; - si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); - enc->fb = &fb; - enc->session(enc); - enc->destroy(enc); - flush(enc); - si_vid_destroy_buffer(&fb); - } - si_vid_destroy_buffer(&enc->cpb); - enc->ws->cs_destroy(enc->cs); - FREE(enc->cpb_array); - FREE(enc); + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; + if (enc->stream_handle) { + struct rvid_buffer fb; + si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); + enc->fb = &fb; + enc->session(enc); + enc->destroy(enc); + flush(enc); + si_vid_destroy_buffer(&fb); + } + si_vid_destroy_buffer(&enc->cpb); + enc->ws->cs_destroy(enc->cs); + FREE(enc->cpb_array); + FREE(enc); } -static void rvce_begin_frame(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_picture_desc *picture) +static void rvce_begin_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source, + struct pipe_picture_desc *picture) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; - struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source; - struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture; - - bool need_rate_control = - enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method || - enc->pic.quant_i_frames != pic->quant_i_frames || - enc->pic.quant_p_frames != pic->quant_p_frames || - enc->pic.quant_b_frames != pic->quant_b_frames || - enc->pic.rate_ctrl.target_bitrate != pic->rate_ctrl.target_bitrate; - - enc->pic = *pic; - enc->si_get_pic_param(enc, pic); - - enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma); - enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma); - - if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) - reset_cpb(enc); - else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || - pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) - sort_cpb(enc); - - if (!enc->stream_handle) { - struct rvid_buffer fb; - enc->stream_handle = si_vid_alloc_stream_handle(); - si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); - enc->fb = &fb; - enc->session(enc); - enc->create(enc); - enc->config(enc); - enc->feedback(enc); - flush(enc); - //dump_feedback(enc, &fb); - si_vid_destroy_buffer(&fb); - need_rate_control = false; - } - - if (need_rate_control) { - enc->session(enc); - enc->config(enc); - flush(enc); - } + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; + struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source; + struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture; + + bool need_rate_control = + enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method || + enc->pic.quant_i_frames != pic->quant_i_frames || + enc->pic.quant_p_frames != pic->quant_p_frames || + enc->pic.quant_b_frames != pic->quant_b_frames || + enc->pic.rate_ctrl.target_bitrate != pic->rate_ctrl.target_bitrate; + + enc->pic = *pic; + enc->si_get_pic_param(enc, pic); + + enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma); + enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma); + + if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) + reset_cpb(enc); + else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) + sort_cpb(enc); + + if (!enc->stream_handle) { + struct rvid_buffer fb; + enc->stream_handle = si_vid_alloc_stream_handle(); + si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); + enc->fb = &fb; + enc->session(enc); + enc->create(enc); + enc->config(enc); + enc->feedback(enc); + flush(enc); + // dump_feedback(enc, &fb); + si_vid_destroy_buffer(&fb); + need_rate_control = false; + } + + if (need_rate_control) { + enc->session(enc); + enc->config(enc); + flush(enc); + } } static void rvce_encode_bitstream(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_resource *destination, - void **fb) + struct pipe_video_buffer *source, + struct pipe_resource *destination, void **fb) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; - enc->get_buffer(destination, &enc->bs_handle, NULL); - enc->bs_size = destination->width0; - - *fb = enc->fb = CALLOC_STRUCT(rvid_buffer); - if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) { - RVID_ERR("Can't create feedback buffer.\n"); - return; - } - if (!radeon_emitted(enc->cs, 0)) - enc->session(enc); - enc->encode(enc); - enc->feedback(enc); + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; + enc->get_buffer(destination, &enc->bs_handle, NULL); + enc->bs_size = destination->width0; + + *fb = enc->fb = CALLOC_STRUCT(rvid_buffer); + if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) { + RVID_ERR("Can't create feedback buffer.\n"); + return; + } + if (!radeon_emitted(enc->cs, 0)) + enc->session(enc); + enc->encode(enc); + enc->feedback(enc); } -static void rvce_end_frame(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_picture_desc *picture) +static void rvce_end_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source, + struct pipe_picture_desc *picture) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; - struct rvce_cpb_slot *slot = LIST_ENTRY( - struct rvce_cpb_slot, enc->cpb_slots.prev, list); - - if (!enc->dual_inst || enc->bs_idx > 1) - flush(enc); - - /* update the CPB backtrack with the just encoded frame */ - slot->picture_type = enc->pic.picture_type; - slot->frame_num = enc->pic.frame_num; - slot->pic_order_cnt = enc->pic.pic_order_cnt; - if (!enc->pic.not_referenced) { - list_del(&slot->list); - list_add(&slot->list, &enc->cpb_slots); - } + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; + struct rvce_cpb_slot *slot = LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); + + if (!enc->dual_inst || enc->bs_idx > 1) + flush(enc); + + /* update the CPB backtrack with the just encoded frame */ + slot->picture_type = enc->pic.picture_type; + slot->frame_num = enc->pic.frame_num; + slot->pic_order_cnt = enc->pic.pic_order_cnt; + if (!enc->pic.not_referenced) { + list_del(&slot->list); + list_add(&slot->list, &enc->cpb_slots); + } } -static void rvce_get_feedback(struct pipe_video_codec *encoder, - void *feedback, unsigned *size) +static void rvce_get_feedback(struct pipe_video_codec *encoder, void *feedback, unsigned *size) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; - struct rvid_buffer *fb = feedback; - - if (size) { - uint32_t *ptr = enc->ws->buffer_map( - fb->res->buf, enc->cs, - PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); - - if (ptr[1]) { - *size = ptr[4] - ptr[9]; - } else { - *size = 0; - } - - enc->ws->buffer_unmap(fb->res->buf); - } - //dump_feedback(enc, fb); - si_vid_destroy_buffer(fb); - FREE(fb); + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; + struct rvid_buffer *fb = feedback; + + if (size) { + uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, + PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); + + if (ptr[1]) { + *size = ptr[4] - ptr[9]; + } else { + *size = 0; + } + + enc->ws->buffer_unmap(fb->res->buf); + } + // dump_feedback(enc, fb); + si_vid_destroy_buffer(fb); + FREE(fb); } /** @@ -375,153 +366,147 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder, */ static void rvce_flush(struct pipe_video_codec *encoder) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; - flush(enc); + flush(enc); } -static void rvce_cs_flush(void *ctx, unsigned flags, - struct pipe_fence_handle **fence) +static void rvce_cs_flush(void *ctx, unsigned flags, struct pipe_fence_handle **fence) { - // just ignored + // just ignored } struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, - const struct pipe_video_codec *templ, - struct radeon_winsys* ws, - rvce_get_buffer get_buffer) + const struct pipe_video_codec *templ, + struct radeon_winsys *ws, rvce_get_buffer get_buffer) { - struct si_screen *sscreen = (struct si_screen *)context->screen; - struct si_context *sctx = (struct si_context*)context; - struct rvce_encoder *enc; - struct pipe_video_buffer *tmp_buf, templat = {}; - struct radeon_surf *tmp_surf; - unsigned cpb_size; - - if (!sscreen->info.vce_fw_version) { - RVID_ERR("Kernel doesn't supports VCE!\n"); - return NULL; - - } else if (!si_vce_is_fw_version_supported(sscreen)) { - RVID_ERR("Unsupported VCE fw version loaded!\n"); - return NULL; - } - - enc = CALLOC_STRUCT(rvce_encoder); - if (!enc) - return NULL; - - if (sscreen->info.is_amdgpu) - enc->use_vm = true; - if ((!sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 42) || - sscreen->info.is_amdgpu) - enc->use_vui = true; - if (sscreen->info.family >= CHIP_TONGA && - sscreen->info.family != CHIP_STONEY && - sscreen->info.family != CHIP_POLARIS11 && - sscreen->info.family != CHIP_POLARIS12 && - sscreen->info.family != CHIP_VEGAM) - enc->dual_pipe = true; - /* TODO enable B frame with dual instance */ - if ((sscreen->info.family >= CHIP_TONGA) && - (templ->max_references == 1) && - (sscreen->info.vce_harvest_config == 0)) - enc->dual_inst = true; - - enc->base = *templ; - enc->base.context = context; - - enc->base.destroy = rvce_destroy; - enc->base.begin_frame = rvce_begin_frame; - enc->base.encode_bitstream = rvce_encode_bitstream; - enc->base.end_frame = rvce_end_frame; - enc->base.flush = rvce_flush; - enc->base.get_feedback = rvce_get_feedback; - enc->get_buffer = get_buffer; - - enc->screen = context->screen; - enc->ws = ws; - enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc, false); - if (!enc->cs) { - RVID_ERR("Can't get command submission context.\n"); - goto error; - } - - templat.buffer_format = PIPE_FORMAT_NV12; - templat.width = enc->base.width; - templat.height = enc->base.height; - templat.interlaced = false; - if (!(tmp_buf = context->create_video_buffer(context, &templat))) { - RVID_ERR("Can't create video buffer.\n"); - goto error; - } - - enc->cpb_num = get_cpb_num(enc); - if (!enc->cpb_num) - goto error; - - get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); - - cpb_size = (sscreen->info.chip_class < GFX9) ? - align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * - align(tmp_surf->u.legacy.level[0].nblk_y, 32) : - - align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) * - align(tmp_surf->u.gfx9.surf_height, 32); - - cpb_size = cpb_size * 3 / 2; - cpb_size = cpb_size * enc->cpb_num; - if (enc->dual_pipe) - cpb_size += RVCE_MAX_AUX_BUFFER_NUM * - RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; - tmp_buf->destroy(tmp_buf); - if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't create CPB buffer.\n"); - goto error; - } - - enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot)); - if (!enc->cpb_array) - goto error; - - reset_cpb(enc); - - switch (sscreen->info.vce_fw_version) { - case FW_40_2_2: - si_vce_40_2_2_init(enc); - break; - - case FW_50_0_1: - case FW_50_1_2: - case FW_50_10_2: - case FW_50_17_3: - si_vce_50_init(enc); - break; - - case FW_52_0_3: - case FW_52_4_3: - case FW_52_8_3: - si_vce_52_init(enc); - break; - - default: - if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) { - si_vce_52_init(enc); - } else - goto error; - } - - return &enc->base; + struct si_screen *sscreen = (struct si_screen *)context->screen; + struct si_context *sctx = (struct si_context *)context; + struct rvce_encoder *enc; + struct pipe_video_buffer *tmp_buf, templat = {}; + struct radeon_surf *tmp_surf; + unsigned cpb_size; + + if (!sscreen->info.vce_fw_version) { + RVID_ERR("Kernel doesn't supports VCE!\n"); + return NULL; + + } else if (!si_vce_is_fw_version_supported(sscreen)) { + RVID_ERR("Unsupported VCE fw version loaded!\n"); + return NULL; + } + + enc = CALLOC_STRUCT(rvce_encoder); + if (!enc) + return NULL; + + if (sscreen->info.is_amdgpu) + enc->use_vm = true; + if ((!sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 42) || sscreen->info.is_amdgpu) + enc->use_vui = true; + if (sscreen->info.family >= CHIP_TONGA && sscreen->info.family != CHIP_STONEY && + sscreen->info.family != CHIP_POLARIS11 && sscreen->info.family != CHIP_POLARIS12 && + sscreen->info.family != CHIP_VEGAM) + enc->dual_pipe = true; + /* TODO enable B frame with dual instance */ + if ((sscreen->info.family >= CHIP_TONGA) && (templ->max_references == 1) && + (sscreen->info.vce_harvest_config == 0)) + enc->dual_inst = true; + + enc->base = *templ; + enc->base.context = context; + + enc->base.destroy = rvce_destroy; + enc->base.begin_frame = rvce_begin_frame; + enc->base.encode_bitstream = rvce_encode_bitstream; + enc->base.end_frame = rvce_end_frame; + enc->base.flush = rvce_flush; + enc->base.get_feedback = rvce_get_feedback; + enc->get_buffer = get_buffer; + + enc->screen = context->screen; + enc->ws = ws; + enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc, false); + if (!enc->cs) { + RVID_ERR("Can't get command submission context.\n"); + goto error; + } + + templat.buffer_format = PIPE_FORMAT_NV12; + templat.width = enc->base.width; + templat.height = enc->base.height; + templat.interlaced = false; + if (!(tmp_buf = context->create_video_buffer(context, &templat))) { + RVID_ERR("Can't create video buffer.\n"); + goto error; + } + + enc->cpb_num = get_cpb_num(enc); + if (!enc->cpb_num) + goto error; + + get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); + + cpb_size = (sscreen->info.chip_class < GFX9) + ? align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * + align(tmp_surf->u.legacy.level[0].nblk_y, 32) + : + + align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) * + align(tmp_surf->u.gfx9.surf_height, 32); + + cpb_size = cpb_size * 3 / 2; + cpb_size = cpb_size * enc->cpb_num; + if (enc->dual_pipe) + cpb_size += RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; + tmp_buf->destroy(tmp_buf); + if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't create CPB buffer.\n"); + goto error; + } + + enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot)); + if (!enc->cpb_array) + goto error; + + reset_cpb(enc); + + switch (sscreen->info.vce_fw_version) { + case FW_40_2_2: + si_vce_40_2_2_init(enc); + break; + + case FW_50_0_1: + case FW_50_1_2: + case FW_50_10_2: + case FW_50_17_3: + si_vce_50_init(enc); + break; + + case FW_52_0_3: + case FW_52_4_3: + case FW_52_8_3: + si_vce_52_init(enc); + break; + + default: + if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) { + si_vce_52_init(enc); + } else + goto error; + } + + return &enc->base; error: - if (enc->cs) - enc->ws->cs_destroy(enc->cs); + if (enc->cs) + enc->ws->cs_destroy(enc->cs); - si_vid_destroy_buffer(&enc->cpb); + si_vid_destroy_buffer(&enc->cpb); - FREE(enc->cpb_array); - FREE(enc); - return NULL; + FREE(enc->cpb_array); + FREE(enc); + return NULL; } /** @@ -529,44 +514,42 @@ error: */ bool si_vce_is_fw_version_supported(struct si_screen *sscreen) { - switch (sscreen->info.vce_fw_version) { - case FW_40_2_2: - case FW_50_0_1: - case FW_50_1_2: - case FW_50_10_2: - case FW_50_17_3: - case FW_52_0_3: - case FW_52_4_3: - case FW_52_8_3: - return true; - default: - if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) - return true; - else - return false; - } + switch (sscreen->info.vce_fw_version) { + case FW_40_2_2: + case FW_50_0_1: + case FW_50_1_2: + case FW_50_10_2: + case FW_50_17_3: + case FW_52_0_3: + case FW_52_4_3: + case FW_52_8_3: + return true; + default: + if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) + return true; + else + return false; + } } /** * Add the buffer as relocation to the current command submission */ -void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, - enum radeon_bo_usage usage, enum radeon_bo_domain domain, - signed offset) +void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, enum radeon_bo_usage usage, + enum radeon_bo_domain domain, signed offset) { - int reloc_idx; - - reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); - if (enc->use_vm) { - uint64_t addr; - addr = enc->ws->buffer_get_virtual_address(buf); - addr = addr + offset; - RVCE_CS(addr >> 32); - RVCE_CS(addr); - } else { - offset += enc->ws->buffer_get_reloc_offset(buf); - RVCE_CS(reloc_idx * 4); - RVCE_CS(offset); - } + int reloc_idx; + + reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + if (enc->use_vm) { + uint64_t addr; + addr = enc->ws->buffer_get_virtual_address(buf); + addr = addr + offset; + RVCE_CS(addr >> 32); + RVCE_CS(addr); + } else { + offset += enc->ws->buffer_get_reloc_offset(buf); + RVCE_CS(reloc_idx * 4); + RVCE_CS(offset); + } } diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h index 8550cc26f17..7ee0500e130 100644 --- a/src/gallium/drivers/radeon/radeon_vce.h +++ b/src/gallium/drivers/radeon/radeon_vce.h @@ -28,408 +28,412 @@ #ifndef RADEON_VCE_H #define RADEON_VCE_H -#include "util/list.h" #include "radeon_video.h" +#include "util/list.h" #define RVCE_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value)) -#define RVCE_BEGIN(cmd) { \ - uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ - RVCE_CS(cmd) -#define RVCE_READ(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) -#define RVCE_WRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off)) -#define RVCE_READWRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) -#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; } +#define RVCE_BEGIN(cmd) \ + { \ + uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ + RVCE_CS(cmd) +#define RVCE_READ(buf, domain, off) \ + si_vce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) +#define RVCE_WRITE(buf, domain, off) \ + si_vce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off)) +#define RVCE_READWRITE(buf, domain, off) \ + si_vce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) +#define RVCE_END() \ + *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \ + } #define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5) -#define RVCE_MAX_AUX_BUFFER_NUM 4 +#define RVCE_MAX_AUX_BUFFER_NUM 4 struct si_screen; /* driver dependent callback */ -typedef void (*rvce_get_buffer)(struct pipe_resource *resource, - struct pb_buffer **handle, - struct radeon_surf **surface); +typedef void (*rvce_get_buffer)(struct pipe_resource *resource, struct pb_buffer **handle, + struct radeon_surf **surface); /* Coded picture buffer slot */ struct rvce_cpb_slot { - struct list_head list; + struct list_head list; - unsigned index; - enum pipe_h264_enc_picture_type picture_type; - unsigned frame_num; - unsigned pic_order_cnt; + unsigned index; + enum pipe_h264_enc_picture_type picture_type; + unsigned frame_num; + unsigned pic_order_cnt; }; struct rvce_rate_control { - uint32_t rc_method; - uint32_t target_bitrate; - uint32_t peak_bitrate; - uint32_t frame_rate_num; - uint32_t gop_size; - uint32_t quant_i_frames; - uint32_t quant_p_frames; - uint32_t quant_b_frames; - uint32_t vbv_buffer_size; - uint32_t frame_rate_den; - uint32_t vbv_buf_lv; - uint32_t max_au_size; - uint32_t qp_initial_mode; - uint32_t target_bits_picture; - uint32_t peak_bits_picture_integer; - uint32_t peak_bits_picture_fraction; - uint32_t min_qp; - uint32_t max_qp; - uint32_t skip_frame_enable; - uint32_t fill_data_enable; - uint32_t enforce_hrd; - uint32_t b_pics_delta_qp; - uint32_t ref_b_pics_delta_qp; - uint32_t rc_reinit_disable; - uint32_t enc_lcvbr_init_qp_flag; - uint32_t lcvbrsatd_based_nonlinear_bit_budget_flag; + uint32_t rc_method; + uint32_t target_bitrate; + uint32_t peak_bitrate; + uint32_t frame_rate_num; + uint32_t gop_size; + uint32_t quant_i_frames; + uint32_t quant_p_frames; + uint32_t quant_b_frames; + uint32_t vbv_buffer_size; + uint32_t frame_rate_den; + uint32_t vbv_buf_lv; + uint32_t max_au_size; + uint32_t qp_initial_mode; + uint32_t target_bits_picture; + uint32_t peak_bits_picture_integer; + uint32_t peak_bits_picture_fraction; + uint32_t min_qp; + uint32_t max_qp; + uint32_t skip_frame_enable; + uint32_t fill_data_enable; + uint32_t enforce_hrd; + uint32_t b_pics_delta_qp; + uint32_t ref_b_pics_delta_qp; + uint32_t rc_reinit_disable; + uint32_t enc_lcvbr_init_qp_flag; + uint32_t lcvbrsatd_based_nonlinear_bit_budget_flag; }; struct rvce_motion_estimation { - uint32_t enc_ime_decimation_search; - uint32_t motion_est_half_pixel; - uint32_t motion_est_quarter_pixel; - uint32_t disable_favor_pmv_point; - uint32_t force_zero_point_center; - uint32_t lsmvert; - uint32_t enc_search_range_x; - uint32_t enc_search_range_y; - uint32_t enc_search1_range_x; - uint32_t enc_search1_range_y; - uint32_t disable_16x16_frame1; - uint32_t disable_satd; - uint32_t enable_amd; - uint32_t enc_disable_sub_mode; - uint32_t enc_ime_skip_x; - uint32_t enc_ime_skip_y; - uint32_t enc_en_ime_overw_dis_subm; - uint32_t enc_ime_overw_dis_subm_no; - uint32_t enc_ime2_search_range_x; - uint32_t enc_ime2_search_range_y; - uint32_t parallel_mode_speedup_enable; - uint32_t fme0_enc_disable_sub_mode; - uint32_t fme1_enc_disable_sub_mode; - uint32_t ime_sw_speedup_enable; + uint32_t enc_ime_decimation_search; + uint32_t motion_est_half_pixel; + uint32_t motion_est_quarter_pixel; + uint32_t disable_favor_pmv_point; + uint32_t force_zero_point_center; + uint32_t lsmvert; + uint32_t enc_search_range_x; + uint32_t enc_search_range_y; + uint32_t enc_search1_range_x; + uint32_t enc_search1_range_y; + uint32_t disable_16x16_frame1; + uint32_t disable_satd; + uint32_t enable_amd; + uint32_t enc_disable_sub_mode; + uint32_t enc_ime_skip_x; + uint32_t enc_ime_skip_y; + uint32_t enc_en_ime_overw_dis_subm; + uint32_t enc_ime_overw_dis_subm_no; + uint32_t enc_ime2_search_range_x; + uint32_t enc_ime2_search_range_y; + uint32_t parallel_mode_speedup_enable; + uint32_t fme0_enc_disable_sub_mode; + uint32_t fme1_enc_disable_sub_mode; + uint32_t ime_sw_speedup_enable; }; struct rvce_pic_control { - uint32_t enc_use_constrained_intra_pred; - uint32_t enc_cabac_enable; - uint32_t enc_cabac_idc; - uint32_t enc_loop_filter_disable; - int32_t enc_lf_beta_offset; - int32_t enc_lf_alpha_c0_offset; - uint32_t enc_crop_left_offset; - uint32_t enc_crop_right_offset; - uint32_t enc_crop_top_offset; - uint32_t enc_crop_bottom_offset; - uint32_t enc_num_mbs_per_slice; - uint32_t enc_intra_refresh_num_mbs_per_slot; - uint32_t enc_force_intra_refresh; - uint32_t enc_force_imb_period; - uint32_t enc_pic_order_cnt_type; - uint32_t log2_max_pic_order_cnt_lsb_minus4; - uint32_t enc_sps_id; - uint32_t enc_pps_id; - uint32_t enc_constraint_set_flags; - uint32_t enc_b_pic_pattern; - uint32_t weight_pred_mode_b_picture; - uint32_t enc_number_of_reference_frames; - uint32_t enc_max_num_ref_frames; - uint32_t enc_num_default_active_ref_l0; - uint32_t enc_num_default_active_ref_l1; - uint32_t enc_slice_mode; - uint32_t enc_max_slice_size; + uint32_t enc_use_constrained_intra_pred; + uint32_t enc_cabac_enable; + uint32_t enc_cabac_idc; + uint32_t enc_loop_filter_disable; + int32_t enc_lf_beta_offset; + int32_t enc_lf_alpha_c0_offset; + uint32_t enc_crop_left_offset; + uint32_t enc_crop_right_offset; + uint32_t enc_crop_top_offset; + uint32_t enc_crop_bottom_offset; + uint32_t enc_num_mbs_per_slice; + uint32_t enc_intra_refresh_num_mbs_per_slot; + uint32_t enc_force_intra_refresh; + uint32_t enc_force_imb_period; + uint32_t enc_pic_order_cnt_type; + uint32_t log2_max_pic_order_cnt_lsb_minus4; + uint32_t enc_sps_id; + uint32_t enc_pps_id; + uint32_t enc_constraint_set_flags; + uint32_t enc_b_pic_pattern; + uint32_t weight_pred_mode_b_picture; + uint32_t enc_number_of_reference_frames; + uint32_t enc_max_num_ref_frames; + uint32_t enc_num_default_active_ref_l0; + uint32_t enc_num_default_active_ref_l1; + uint32_t enc_slice_mode; + uint32_t enc_max_slice_size; }; struct rvce_task_info { - uint32_t offset_of_next_task_info; - uint32_t task_operation; - uint32_t reference_picture_dependency; - uint32_t collocate_flag_dependency; - uint32_t feedback_index; - uint32_t video_bitstream_ring_index; + uint32_t offset_of_next_task_info; + uint32_t task_operation; + uint32_t reference_picture_dependency; + uint32_t collocate_flag_dependency; + uint32_t feedback_index; + uint32_t video_bitstream_ring_index; }; struct rvce_feedback_buf_pkg { - uint32_t feedback_ring_address_hi; - uint32_t feedback_ring_address_lo; - uint32_t feedback_ring_size; + uint32_t feedback_ring_address_hi; + uint32_t feedback_ring_address_lo; + uint32_t feedback_ring_size; }; struct rvce_rdo { - uint32_t enc_disable_tbe_pred_i_frame; - uint32_t enc_disable_tbe_pred_p_frame; - uint32_t use_fme_interpol_y; - uint32_t use_fme_interpol_uv; - uint32_t use_fme_intrapol_y; - uint32_t use_fme_intrapol_uv; - uint32_t use_fme_interpol_y_1; - uint32_t use_fme_interpol_uv_1; - uint32_t use_fme_intrapol_y_1; - uint32_t use_fme_intrapol_uv_1; - uint32_t enc_16x16_cost_adj; - uint32_t enc_skip_cost_adj; - uint32_t enc_force_16x16_skip; - uint32_t enc_disable_threshold_calc_a; - uint32_t enc_luma_coeff_cost; - uint32_t enc_luma_mb_coeff_cost; - uint32_t enc_chroma_coeff_cost; + uint32_t enc_disable_tbe_pred_i_frame; + uint32_t enc_disable_tbe_pred_p_frame; + uint32_t use_fme_interpol_y; + uint32_t use_fme_interpol_uv; + uint32_t use_fme_intrapol_y; + uint32_t use_fme_intrapol_uv; + uint32_t use_fme_interpol_y_1; + uint32_t use_fme_interpol_uv_1; + uint32_t use_fme_intrapol_y_1; + uint32_t use_fme_intrapol_uv_1; + uint32_t enc_16x16_cost_adj; + uint32_t enc_skip_cost_adj; + uint32_t enc_force_16x16_skip; + uint32_t enc_disable_threshold_calc_a; + uint32_t enc_luma_coeff_cost; + uint32_t enc_luma_mb_coeff_cost; + uint32_t enc_chroma_coeff_cost; }; struct rvce_vui { - uint32_t aspect_ratio_info_present_flag; - uint32_t aspect_ratio_idc; - uint32_t sar_width; - uint32_t sar_height; - uint32_t overscan_info_present_flag; - uint32_t overscan_Approp_flag; - uint32_t video_signal_type_present_flag; - uint32_t video_format; - uint32_t video_full_range_flag; - uint32_t color_description_present_flag; - uint32_t color_prim; - uint32_t transfer_char; - uint32_t matrix_coef; - uint32_t chroma_loc_info_present_flag; - uint32_t chroma_loc_top; - uint32_t chroma_loc_bottom; - uint32_t timing_info_present_flag; - uint32_t num_units_in_tick; - uint32_t time_scale; - uint32_t fixed_frame_rate_flag; - uint32_t nal_hrd_parameters_present_flag; - uint32_t cpb_cnt_minus1; - uint32_t bit_rate_scale; - uint32_t cpb_size_scale; - uint32_t bit_rate_value_minus; - uint32_t cpb_size_value_minus; - uint32_t cbr_flag; - uint32_t initial_cpb_removal_delay_length_minus1; - uint32_t cpb_removal_delay_length_minus1; - uint32_t dpb_output_delay_length_minus1; - uint32_t time_offset_length; - uint32_t low_delay_hrd_flag; - uint32_t pic_struct_present_flag; - uint32_t bitstream_restriction_present_flag; - uint32_t motion_vectors_over_pic_boundaries_flag; - uint32_t max_bytes_per_pic_denom; - uint32_t max_bits_per_mb_denom; - uint32_t log2_max_mv_length_hori; - uint32_t log2_max_mv_length_vert; - uint32_t num_reorder_frames; - uint32_t max_dec_frame_buffering; + uint32_t aspect_ratio_info_present_flag; + uint32_t aspect_ratio_idc; + uint32_t sar_width; + uint32_t sar_height; + uint32_t overscan_info_present_flag; + uint32_t overscan_Approp_flag; + uint32_t video_signal_type_present_flag; + uint32_t video_format; + uint32_t video_full_range_flag; + uint32_t color_description_present_flag; + uint32_t color_prim; + uint32_t transfer_char; + uint32_t matrix_coef; + uint32_t chroma_loc_info_present_flag; + uint32_t chroma_loc_top; + uint32_t chroma_loc_bottom; + uint32_t timing_info_present_flag; + uint32_t num_units_in_tick; + uint32_t time_scale; + uint32_t fixed_frame_rate_flag; + uint32_t nal_hrd_parameters_present_flag; + uint32_t cpb_cnt_minus1; + uint32_t bit_rate_scale; + uint32_t cpb_size_scale; + uint32_t bit_rate_value_minus; + uint32_t cpb_size_value_minus; + uint32_t cbr_flag; + uint32_t initial_cpb_removal_delay_length_minus1; + uint32_t cpb_removal_delay_length_minus1; + uint32_t dpb_output_delay_length_minus1; + uint32_t time_offset_length; + uint32_t low_delay_hrd_flag; + uint32_t pic_struct_present_flag; + uint32_t bitstream_restriction_present_flag; + uint32_t motion_vectors_over_pic_boundaries_flag; + uint32_t max_bytes_per_pic_denom; + uint32_t max_bits_per_mb_denom; + uint32_t log2_max_mv_length_hori; + uint32_t log2_max_mv_length_vert; + uint32_t num_reorder_frames; + uint32_t max_dec_frame_buffering; }; struct rvce_enc_operation { - uint32_t insert_headers; - uint32_t picture_structure; - uint32_t allowed_max_bitstream_size; - uint32_t force_refresh_map; - uint32_t insert_aud; - uint32_t end_of_sequence; - uint32_t end_of_stream; - uint32_t input_picture_luma_address_hi; - uint32_t input_picture_luma_address_lo; - uint32_t input_picture_chroma_address_hi; - uint32_t input_picture_chroma_address_lo; - uint32_t enc_input_frame_y_pitch; - uint32_t enc_input_pic_luma_pitch; - uint32_t enc_input_pic_chroma_pitch;; - uint32_t enc_input_pic_addr_array; - uint32_t enc_input_pic_addr_array_disable2pipe_disablemboffload; - uint32_t enc_input_pic_tile_config; - uint32_t enc_pic_type; - uint32_t enc_idr_flag; - uint32_t enc_idr_pic_id; - uint32_t enc_mgs_key_pic; - uint32_t enc_reference_flag; - uint32_t enc_temporal_layer_index; - uint32_t num_ref_idx_active_override_flag; - uint32_t num_ref_idx_l0_active_minus1; - uint32_t num_ref_idx_l1_active_minus1; - uint32_t enc_ref_list_modification_op; - uint32_t enc_ref_list_modification_num; - uint32_t enc_decoded_picture_marking_op; - uint32_t enc_decoded_picture_marking_num; - uint32_t enc_decoded_picture_marking_idx; - uint32_t enc_decoded_ref_base_picture_marking_op; - uint32_t enc_decoded_ref_base_picture_marking_num; - uint32_t l0_picture_structure; - uint32_t l0_enc_pic_type; - uint32_t l0_frame_number; - uint32_t l0_picture_order_count; - uint32_t l0_luma_offset; - uint32_t l0_chroma_offset; - uint32_t l1_picture_structure; - uint32_t l1_enc_pic_type; - uint32_t l1_frame_number; - uint32_t l1_picture_order_count; - uint32_t l1_luma_offset; - uint32_t l1_chroma_offset; - uint32_t enc_reconstructed_luma_offset; - uint32_t enc_reconstructed_chroma_offset;; - uint32_t enc_coloc_buffer_offset; - uint32_t enc_reconstructed_ref_base_picture_luma_offset; - uint32_t enc_reconstructed_ref_base_picture_chroma_offset; - uint32_t enc_reference_ref_base_picture_luma_offset; - uint32_t enc_reference_ref_base_picture_chroma_offset; - uint32_t picture_count; - uint32_t frame_number; - uint32_t picture_order_count; - uint32_t num_i_pic_remain_in_rcgop; - uint32_t num_p_pic_remain_in_rcgop; - uint32_t num_b_pic_remain_in_rcgop; - uint32_t num_ir_pic_remain_in_rcgop; - uint32_t enable_intra_refresh; - uint32_t aq_variance_en; - uint32_t aq_block_size; - uint32_t aq_mb_variance_sel; - uint32_t aq_frame_variance_sel; - uint32_t aq_param_a; - uint32_t aq_param_b; - uint32_t aq_param_c; - uint32_t aq_param_d; - uint32_t aq_param_e; - uint32_t context_in_sfb; + uint32_t insert_headers; + uint32_t picture_structure; + uint32_t allowed_max_bitstream_size; + uint32_t force_refresh_map; + uint32_t insert_aud; + uint32_t end_of_sequence; + uint32_t end_of_stream; + uint32_t input_picture_luma_address_hi; + uint32_t input_picture_luma_address_lo; + uint32_t input_picture_chroma_address_hi; + uint32_t input_picture_chroma_address_lo; + uint32_t enc_input_frame_y_pitch; + uint32_t enc_input_pic_luma_pitch; + uint32_t enc_input_pic_chroma_pitch; + ; + uint32_t enc_input_pic_addr_array; + uint32_t enc_input_pic_addr_array_disable2pipe_disablemboffload; + uint32_t enc_input_pic_tile_config; + uint32_t enc_pic_type; + uint32_t enc_idr_flag; + uint32_t enc_idr_pic_id; + uint32_t enc_mgs_key_pic; + uint32_t enc_reference_flag; + uint32_t enc_temporal_layer_index; + uint32_t num_ref_idx_active_override_flag; + uint32_t num_ref_idx_l0_active_minus1; + uint32_t num_ref_idx_l1_active_minus1; + uint32_t enc_ref_list_modification_op; + uint32_t enc_ref_list_modification_num; + uint32_t enc_decoded_picture_marking_op; + uint32_t enc_decoded_picture_marking_num; + uint32_t enc_decoded_picture_marking_idx; + uint32_t enc_decoded_ref_base_picture_marking_op; + uint32_t enc_decoded_ref_base_picture_marking_num; + uint32_t l0_picture_structure; + uint32_t l0_enc_pic_type; + uint32_t l0_frame_number; + uint32_t l0_picture_order_count; + uint32_t l0_luma_offset; + uint32_t l0_chroma_offset; + uint32_t l1_picture_structure; + uint32_t l1_enc_pic_type; + uint32_t l1_frame_number; + uint32_t l1_picture_order_count; + uint32_t l1_luma_offset; + uint32_t l1_chroma_offset; + uint32_t enc_reconstructed_luma_offset; + uint32_t enc_reconstructed_chroma_offset; + ; + uint32_t enc_coloc_buffer_offset; + uint32_t enc_reconstructed_ref_base_picture_luma_offset; + uint32_t enc_reconstructed_ref_base_picture_chroma_offset; + uint32_t enc_reference_ref_base_picture_luma_offset; + uint32_t enc_reference_ref_base_picture_chroma_offset; + uint32_t picture_count; + uint32_t frame_number; + uint32_t picture_order_count; + uint32_t num_i_pic_remain_in_rcgop; + uint32_t num_p_pic_remain_in_rcgop; + uint32_t num_b_pic_remain_in_rcgop; + uint32_t num_ir_pic_remain_in_rcgop; + uint32_t enable_intra_refresh; + uint32_t aq_variance_en; + uint32_t aq_block_size; + uint32_t aq_mb_variance_sel; + uint32_t aq_frame_variance_sel; + uint32_t aq_param_a; + uint32_t aq_param_b; + uint32_t aq_param_c; + uint32_t aq_param_d; + uint32_t aq_param_e; + uint32_t context_in_sfb; }; struct rvce_enc_create { - uint32_t enc_use_circular_buffer; - uint32_t enc_profile; - uint32_t enc_level; - uint32_t enc_pic_struct_restriction; - uint32_t enc_image_width; - uint32_t enc_image_height; - uint32_t enc_ref_pic_luma_pitch; - uint32_t enc_ref_pic_chroma_pitch; - uint32_t enc_ref_y_height_in_qw; - uint32_t enc_ref_pic_addr_array_enc_pic_struct_restriction_disable_rdo; - uint32_t enc_pre_encode_context_buffer_offset; - uint32_t enc_pre_encode_input_luma_buffer_offset; - uint32_t enc_pre_encode_input_chroma_buffer_offset; - uint32_t enc_pre_encode_mode_chromaflag_vbaqmode_scenechangesensitivity; + uint32_t enc_use_circular_buffer; + uint32_t enc_profile; + uint32_t enc_level; + uint32_t enc_pic_struct_restriction; + uint32_t enc_image_width; + uint32_t enc_image_height; + uint32_t enc_ref_pic_luma_pitch; + uint32_t enc_ref_pic_chroma_pitch; + uint32_t enc_ref_y_height_in_qw; + uint32_t enc_ref_pic_addr_array_enc_pic_struct_restriction_disable_rdo; + uint32_t enc_pre_encode_context_buffer_offset; + uint32_t enc_pre_encode_input_luma_buffer_offset; + uint32_t enc_pre_encode_input_chroma_buffer_offset; + uint32_t enc_pre_encode_mode_chromaflag_vbaqmode_scenechangesensitivity; }; struct rvce_config_ext { - uint32_t enc_enable_perf_logging; + uint32_t enc_enable_perf_logging; }; struct rvce_h264_enc_pic { - struct rvce_rate_control rc; - struct rvce_motion_estimation me; - struct rvce_pic_control pc; - struct rvce_task_info ti; - struct rvce_feedback_buf_pkg fb; - struct rvce_rdo rdo; - struct rvce_vui vui; - struct rvce_enc_operation eo; - struct rvce_enc_create ec; - struct rvce_config_ext ce; - - unsigned quant_i_frames; - unsigned quant_p_frames; - unsigned quant_b_frames; - - enum pipe_h264_enc_picture_type picture_type; - unsigned frame_num; - unsigned frame_num_cnt; - unsigned p_remain; - unsigned i_remain; - unsigned idr_pic_id; - unsigned gop_cnt; - unsigned gop_size; - unsigned pic_order_cnt; - unsigned ref_idx_l0; - unsigned ref_idx_l1; - unsigned addrmode_arraymode_disrdo_distwoinstants; - - bool not_referenced; - bool is_idr; - bool has_ref_pic_list; - bool enable_vui; - unsigned int ref_pic_list_0[32]; - unsigned int ref_pic_list_1[32]; - unsigned int frame_idx[32]; + struct rvce_rate_control rc; + struct rvce_motion_estimation me; + struct rvce_pic_control pc; + struct rvce_task_info ti; + struct rvce_feedback_buf_pkg fb; + struct rvce_rdo rdo; + struct rvce_vui vui; + struct rvce_enc_operation eo; + struct rvce_enc_create ec; + struct rvce_config_ext ce; + + unsigned quant_i_frames; + unsigned quant_p_frames; + unsigned quant_b_frames; + + enum pipe_h264_enc_picture_type picture_type; + unsigned frame_num; + unsigned frame_num_cnt; + unsigned p_remain; + unsigned i_remain; + unsigned idr_pic_id; + unsigned gop_cnt; + unsigned gop_size; + unsigned pic_order_cnt; + unsigned ref_idx_l0; + unsigned ref_idx_l1; + unsigned addrmode_arraymode_disrdo_distwoinstants; + + bool not_referenced; + bool is_idr; + bool has_ref_pic_list; + bool enable_vui; + unsigned int ref_pic_list_0[32]; + unsigned int ref_pic_list_1[32]; + unsigned int frame_idx[32]; }; /* VCE encoder representation */ struct rvce_encoder { - struct pipe_video_codec base; - - /* version specific packets */ - void (*session)(struct rvce_encoder *enc); - void (*create)(struct rvce_encoder *enc); - void (*feedback)(struct rvce_encoder *enc); - void (*rate_control)(struct rvce_encoder *enc); - void (*config_extension)(struct rvce_encoder *enc); - void (*pic_control)(struct rvce_encoder *enc); - void (*motion_estimation)(struct rvce_encoder *enc); - void (*rdo)(struct rvce_encoder *enc); - void (*vui)(struct rvce_encoder *enc); - void (*config)(struct rvce_encoder *enc); - void (*encode)(struct rvce_encoder *enc); - void (*destroy)(struct rvce_encoder *enc); - void (*task_info)(struct rvce_encoder *enc, uint32_t op, - uint32_t dep, uint32_t fb_idx, - uint32_t ring_idx); - void (*si_get_pic_param)(struct rvce_encoder *enc, - struct pipe_h264_enc_picture_desc *pic); - - unsigned stream_handle; - - struct pipe_screen *screen; - struct radeon_winsys* ws; - struct radeon_cmdbuf* cs; - - rvce_get_buffer get_buffer; - - struct pb_buffer* handle; - struct radeon_surf* luma; - struct radeon_surf* chroma; - - struct pb_buffer* bs_handle; - unsigned bs_size; - - struct rvce_cpb_slot *cpb_array; - struct list_head cpb_slots; - unsigned cpb_num; - - struct rvid_buffer *fb; - struct rvid_buffer cpb; - struct pipe_h264_enc_picture_desc pic; - struct rvce_h264_enc_pic enc_pic; - - unsigned task_info_idx; - unsigned bs_idx; - - bool use_vm; - bool use_vui; - bool dual_pipe; - bool dual_inst; + struct pipe_video_codec base; + + /* version specific packets */ + void (*session)(struct rvce_encoder *enc); + void (*create)(struct rvce_encoder *enc); + void (*feedback)(struct rvce_encoder *enc); + void (*rate_control)(struct rvce_encoder *enc); + void (*config_extension)(struct rvce_encoder *enc); + void (*pic_control)(struct rvce_encoder *enc); + void (*motion_estimation)(struct rvce_encoder *enc); + void (*rdo)(struct rvce_encoder *enc); + void (*vui)(struct rvce_encoder *enc); + void (*config)(struct rvce_encoder *enc); + void (*encode)(struct rvce_encoder *enc); + void (*destroy)(struct rvce_encoder *enc); + void (*task_info)(struct rvce_encoder *enc, uint32_t op, uint32_t dep, uint32_t fb_idx, + uint32_t ring_idx); + void (*si_get_pic_param)(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic); + + unsigned stream_handle; + + struct pipe_screen *screen; + struct radeon_winsys *ws; + struct radeon_cmdbuf *cs; + + rvce_get_buffer get_buffer; + + struct pb_buffer *handle; + struct radeon_surf *luma; + struct radeon_surf *chroma; + + struct pb_buffer *bs_handle; + unsigned bs_size; + + struct rvce_cpb_slot *cpb_array; + struct list_head cpb_slots; + unsigned cpb_num; + + struct rvid_buffer *fb; + struct rvid_buffer cpb; + struct pipe_h264_enc_picture_desc pic; + struct rvce_h264_enc_pic enc_pic; + + unsigned task_info_idx; + unsigned bs_idx; + + bool use_vm; + bool use_vui; + bool dual_pipe; + bool dual_inst; }; /* CPB handling functions */ struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc); struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc); struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc); -void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, - signed *luma_offset, signed *chroma_offset); +void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, signed *luma_offset, + signed *chroma_offset); struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, - const struct pipe_video_codec *templat, - struct radeon_winsys* ws, - rvce_get_buffer get_buffer); + const struct pipe_video_codec *templat, + struct radeon_winsys *ws, + rvce_get_buffer get_buffer); bool si_vce_is_fw_version_supported(struct si_screen *sscreen); -void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, - enum radeon_bo_usage usage, enum radeon_bo_domain domain, - signed offset); +void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, enum radeon_bo_usage usage, + enum radeon_bo_domain domain, signed offset); /* init vce fw 40.2.2 specific callbacks */ void si_vce_40_2_2_init(struct rvce_encoder *enc); @@ -441,15 +445,12 @@ void si_vce_50_init(struct rvce_encoder *enc); void si_vce_52_init(struct rvce_encoder *enc); /* get parameters for vce 40.2.2 */ -void si_vce_40_2_2_get_param(struct rvce_encoder *enc, - struct pipe_h264_enc_picture_desc *pic); +void si_vce_40_2_2_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic); /* get parameters for vce 50 */ -void si_vce_50_get_param(struct rvce_encoder *enc, - struct pipe_h264_enc_picture_desc *pic); +void si_vce_50_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic); /* get parameters for vce 52 */ -void si_vce_52_get_param(struct rvce_encoder *enc, - struct pipe_h264_enc_picture_desc *pic); +void si_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic); #endif diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c index f2d11d49c6f..3653540eaba 100644 --- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c +++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c @@ -25,403 +25,400 @@ * **************************************************************************/ -#include <stdio.h> - #include "pipe/p_video_codec.h" - -#include "util/u_video.h" +#include "radeon_vce.h" +#include "radeon_video.h" +#include "si_pipe.h" #include "util/u_memory.h" - +#include "util/u_video.h" #include "vl/vl_video_buffer.h" -#include "si_pipe.h" -#include "radeon_video.h" -#include "radeon_vce.h" +#include <stdio.h> static void session(struct rvce_encoder *enc) { - RVCE_BEGIN(0x00000001); // session cmd - RVCE_CS(enc->stream_handle); - RVCE_END(); + RVCE_BEGIN(0x00000001); // session cmd + RVCE_CS(enc->stream_handle); + RVCE_END(); } -static void task_info(struct rvce_encoder *enc, uint32_t op, - uint32_t dep, uint32_t fb_idx, uint32_t ring_idx) +static void task_info(struct rvce_encoder *enc, uint32_t op, uint32_t dep, uint32_t fb_idx, + uint32_t ring_idx) { - RVCE_BEGIN(0x00000002); // task info - if (op == 0x3) { - if (enc->task_info_idx) { - uint32_t offs = enc->cs->current.cdw - enc->task_info_idx + 3; - // Update offsetOfNextTaskInfo - enc->cs->current.buf[enc->task_info_idx] = offs; - } - enc->task_info_idx = enc->cs->current.cdw; - } - RVCE_CS(0xffffffff); // offsetOfNextTaskInfo - RVCE_CS(op); // taskOperation - RVCE_CS(dep); // referencePictureDependency - RVCE_CS(0x00000000); // collocateFlagDependency - RVCE_CS(fb_idx); // feedbackIndex - RVCE_CS(ring_idx); // videoBitstreamRingIndex - RVCE_END(); + RVCE_BEGIN(0x00000002); // task info + if (op == 0x3) { + if (enc->task_info_idx) { + uint32_t offs = enc->cs->current.cdw - enc->task_info_idx + 3; + // Update offsetOfNextTaskInfo + enc->cs->current.buf[enc->task_info_idx] = offs; + } + enc->task_info_idx = enc->cs->current.cdw; + } + RVCE_CS(0xffffffff); // offsetOfNextTaskInfo + RVCE_CS(op); // taskOperation + RVCE_CS(dep); // referencePictureDependency + RVCE_CS(0x00000000); // collocateFlagDependency + RVCE_CS(fb_idx); // feedbackIndex + RVCE_CS(ring_idx); // videoBitstreamRingIndex + RVCE_END(); } static void feedback(struct rvce_encoder *enc) { - RVCE_BEGIN(0x05000005); // feedback buffer - RVCE_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo - RVCE_CS(0x00000001); // feedbackRingSize - RVCE_END(); + RVCE_BEGIN(0x05000005); // feedback buffer + RVCE_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo + RVCE_CS(0x00000001); // feedbackRingSize + RVCE_END(); } static void create(struct rvce_encoder *enc) { - enc->task_info(enc, 0x00000000, 0, 0, 0); - - RVCE_BEGIN(0x01000001); // create cmd - RVCE_CS(0x00000000); // encUseCircularBuffer - RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile - RVCE_CS(enc->base.level); // encLevel - RVCE_CS(0x00000000); // encPicStructRestriction - RVCE_CS(enc->base.width); // encImageWidth - RVCE_CS(enc->base.height); // encImageHeight - RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch - RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch - RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw - RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO - RVCE_END(); + enc->task_info(enc, 0x00000000, 0, 0, 0); + + RVCE_BEGIN(0x01000001); // create cmd + RVCE_CS(0x00000000); // encUseCircularBuffer + RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile + RVCE_CS(enc->base.level); // encLevel + RVCE_CS(0x00000000); // encPicStructRestriction + RVCE_CS(enc->base.width); // encImageWidth + RVCE_CS(enc->base.height); // encImageHeight + RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch + RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch + RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw + RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO + RVCE_END(); } static void rate_control(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000005); // rate control - RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod - RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate - RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate - RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum - RVCE_CS(0x00000000); // encGOPSize - RVCE_CS(enc->pic.quant_i_frames); // encQP_I - RVCE_CS(enc->pic.quant_p_frames); // encQP_P - RVCE_CS(enc->pic.quant_b_frames); // encQP_B - RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize - RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen - RVCE_CS(0x00000000); // encVBVBufferLevel - RVCE_CS(0x00000000); // encMaxAUSize - RVCE_CS(0x00000000); // encQPInitialMode - RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture - RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger - RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional - RVCE_CS(0x00000000); // encMinQP - RVCE_CS(0x00000033); // encMaxQP - RVCE_CS(0x00000000); // encSkipFrameEnable - RVCE_CS(0x00000000); // encFillerDataEnable - RVCE_CS(0x00000000); // encEnforceHRD - RVCE_CS(0x00000000); // encBPicsDeltaQP - RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP - RVCE_CS(0x00000000); // encRateControlReInitDisable - RVCE_END(); + RVCE_BEGIN(0x04000005); // rate control + RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod + RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate + RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate + RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum + RVCE_CS(0x00000000); // encGOPSize + RVCE_CS(enc->pic.quant_i_frames); // encQP_I + RVCE_CS(enc->pic.quant_p_frames); // encQP_P + RVCE_CS(enc->pic.quant_b_frames); // encQP_B + RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize + RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen + RVCE_CS(0x00000000); // encVBVBufferLevel + RVCE_CS(0x00000000); // encMaxAUSize + RVCE_CS(0x00000000); // encQPInitialMode + RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture + RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger + RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional + RVCE_CS(0x00000000); // encMinQP + RVCE_CS(0x00000033); // encMaxQP + RVCE_CS(0x00000000); // encSkipFrameEnable + RVCE_CS(0x00000000); // encFillerDataEnable + RVCE_CS(0x00000000); // encEnforceHRD + RVCE_CS(0x00000000); // encBPicsDeltaQP + RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP + RVCE_CS(0x00000000); // encRateControlReInitDisable + RVCE_END(); } static void config_extension(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000001); // config extension - RVCE_CS(0x00000003); // encEnablePerfLogging - RVCE_END(); + RVCE_BEGIN(0x04000001); // config extension + RVCE_CS(0x00000003); // encEnablePerfLogging + RVCE_END(); } static void pic_control(struct rvce_encoder *enc) { - unsigned encNumMBsPerSlice; - - encNumMBsPerSlice = align(enc->base.width, 16) / 16; - encNumMBsPerSlice *= align(enc->base.height, 16) / 16; - - RVCE_BEGIN(0x04000002); // pic control - RVCE_CS(0x00000000); // encUseConstrainedIntraPred - RVCE_CS(0x00000000); // encCABACEnable - RVCE_CS(0x00000000); // encCABACIDC - RVCE_CS(0x00000000); // encLoopFilterDisable - RVCE_CS(0x00000000); // encLFBetaOffset - RVCE_CS(0x00000000); // encLFAlphaC0Offset - RVCE_CS(0x00000000); // encCropLeftOffset - RVCE_CS((align(enc->base.width, 16) - enc->base.width) >> 1); // encCropRightOffset - RVCE_CS(0x00000000); // encCropTopOffset - RVCE_CS((align(enc->base.height, 16) - enc->base.height) >> 1); // encCropBottomOffset - RVCE_CS(encNumMBsPerSlice); // encNumMBsPerSlice - RVCE_CS(0x00000000); // encIntraRefreshNumMBsPerSlot - RVCE_CS(0x00000000); // encForceIntraRefresh - RVCE_CS(0x00000000); // encForceIMBPeriod - RVCE_CS(0x00000000); // encPicOrderCntType - RVCE_CS(0x00000000); // log2_max_pic_order_cnt_lsb_minus4 - RVCE_CS(0x00000000); // encSPSID - RVCE_CS(0x00000000); // encPPSID - RVCE_CS(0x00000040); // encConstraintSetFlags - RVCE_CS(MAX2(enc->base.max_references, 1) - 1); // encBPicPattern - RVCE_CS(0x00000000); // weightPredModeBPicture - RVCE_CS(MIN2(enc->base.max_references, 2)); // encNumberOfReferenceFrames - RVCE_CS(enc->base.max_references + 1); // encMaxNumRefFrames - RVCE_CS(0x00000001); // encNumDefaultActiveRefL0 - RVCE_CS(0x00000001); // encNumDefaultActiveRefL1 - RVCE_CS(0x00000000); // encSliceMode - RVCE_CS(0x00000000); // encMaxSliceSize - RVCE_END(); + unsigned encNumMBsPerSlice; + + encNumMBsPerSlice = align(enc->base.width, 16) / 16; + encNumMBsPerSlice *= align(enc->base.height, 16) / 16; + + RVCE_BEGIN(0x04000002); // pic control + RVCE_CS(0x00000000); // encUseConstrainedIntraPred + RVCE_CS(0x00000000); // encCABACEnable + RVCE_CS(0x00000000); // encCABACIDC + RVCE_CS(0x00000000); // encLoopFilterDisable + RVCE_CS(0x00000000); // encLFBetaOffset + RVCE_CS(0x00000000); // encLFAlphaC0Offset + RVCE_CS(0x00000000); // encCropLeftOffset + RVCE_CS((align(enc->base.width, 16) - enc->base.width) >> 1); // encCropRightOffset + RVCE_CS(0x00000000); // encCropTopOffset + RVCE_CS((align(enc->base.height, 16) - enc->base.height) >> 1); // encCropBottomOffset + RVCE_CS(encNumMBsPerSlice); // encNumMBsPerSlice + RVCE_CS(0x00000000); // encIntraRefreshNumMBsPerSlot + RVCE_CS(0x00000000); // encForceIntraRefresh + RVCE_CS(0x00000000); // encForceIMBPeriod + RVCE_CS(0x00000000); // encPicOrderCntType + RVCE_CS(0x00000000); // log2_max_pic_order_cnt_lsb_minus4 + RVCE_CS(0x00000000); // encSPSID + RVCE_CS(0x00000000); // encPPSID + RVCE_CS(0x00000040); // encConstraintSetFlags + RVCE_CS(MAX2(enc->base.max_references, 1) - 1); // encBPicPattern + RVCE_CS(0x00000000); // weightPredModeBPicture + RVCE_CS(MIN2(enc->base.max_references, 2)); // encNumberOfReferenceFrames + RVCE_CS(enc->base.max_references + 1); // encMaxNumRefFrames + RVCE_CS(0x00000001); // encNumDefaultActiveRefL0 + RVCE_CS(0x00000001); // encNumDefaultActiveRefL1 + RVCE_CS(0x00000000); // encSliceMode + RVCE_CS(0x00000000); // encMaxSliceSize + RVCE_END(); } static void motion_estimation(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000007); // motion estimation - RVCE_CS(0x00000001); // encIMEDecimationSearch - RVCE_CS(0x00000001); // motionEstHalfPixel - RVCE_CS(0x00000000); // motionEstQuarterPixel - RVCE_CS(0x00000000); // disableFavorPMVPoint - RVCE_CS(0x00000000); // forceZeroPointCenter - RVCE_CS(0x00000000); // LSMVert - RVCE_CS(0x00000010); // encSearchRangeX - RVCE_CS(0x00000010); // encSearchRangeY - RVCE_CS(0x00000010); // encSearch1RangeX - RVCE_CS(0x00000010); // encSearch1RangeY - RVCE_CS(0x00000000); // disable16x16Frame1 - RVCE_CS(0x00000000); // disableSATD - RVCE_CS(0x00000000); // enableAMD - RVCE_CS(0x000000fe); // encDisableSubMode - RVCE_CS(0x00000000); // encIMESkipX - RVCE_CS(0x00000000); // encIMESkipY - RVCE_CS(0x00000000); // encEnImeOverwDisSubm - RVCE_CS(0x00000000); // encImeOverwDisSubmNo - RVCE_CS(0x00000001); // encIME2SearchRangeX - RVCE_CS(0x00000001); // encIME2SearchRangeY - RVCE_CS(0x00000000); // parallelModeSpeedupEnable - RVCE_CS(0x00000000); // fme0_encDisableSubMode - RVCE_CS(0x00000000); // fme1_encDisableSubMode - RVCE_CS(0x00000000); // imeSWSpeedupEnable - RVCE_END(); + RVCE_BEGIN(0x04000007); // motion estimation + RVCE_CS(0x00000001); // encIMEDecimationSearch + RVCE_CS(0x00000001); // motionEstHalfPixel + RVCE_CS(0x00000000); // motionEstQuarterPixel + RVCE_CS(0x00000000); // disableFavorPMVPoint + RVCE_CS(0x00000000); // forceZeroPointCenter + RVCE_CS(0x00000000); // LSMVert + RVCE_CS(0x00000010); // encSearchRangeX + RVCE_CS(0x00000010); // encSearchRangeY + RVCE_CS(0x00000010); // encSearch1RangeX + RVCE_CS(0x00000010); // encSearch1RangeY + RVCE_CS(0x00000000); // disable16x16Frame1 + RVCE_CS(0x00000000); // disableSATD + RVCE_CS(0x00000000); // enableAMD + RVCE_CS(0x000000fe); // encDisableSubMode + RVCE_CS(0x00000000); // encIMESkipX + RVCE_CS(0x00000000); // encIMESkipY + RVCE_CS(0x00000000); // encEnImeOverwDisSubm + RVCE_CS(0x00000000); // encImeOverwDisSubmNo + RVCE_CS(0x00000001); // encIME2SearchRangeX + RVCE_CS(0x00000001); // encIME2SearchRangeY + RVCE_CS(0x00000000); // parallelModeSpeedupEnable + RVCE_CS(0x00000000); // fme0_encDisableSubMode + RVCE_CS(0x00000000); // fme1_encDisableSubMode + RVCE_CS(0x00000000); // imeSWSpeedupEnable + RVCE_END(); } static void rdo(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000008); // rdo - RVCE_CS(0x00000000); // encDisableTbePredIFrame - RVCE_CS(0x00000000); // encDisableTbePredPFrame - RVCE_CS(0x00000000); // useFmeInterpolY - RVCE_CS(0x00000000); // useFmeInterpolUV - RVCE_CS(0x00000000); // useFmeIntrapolY - RVCE_CS(0x00000000); // useFmeIntrapolUV - RVCE_CS(0x00000000); // useFmeInterpolY_1 - RVCE_CS(0x00000000); // useFmeInterpolUV_1 - RVCE_CS(0x00000000); // useFmeIntrapolY_1 - RVCE_CS(0x00000000); // useFmeIntrapolUV_1 - RVCE_CS(0x00000000); // enc16x16CostAdj - RVCE_CS(0x00000000); // encSkipCostAdj - RVCE_CS(0x00000000); // encForce16x16skip - RVCE_CS(0x00000000); // encDisableThresholdCalcA - RVCE_CS(0x00000000); // encLumaCoeffCost - RVCE_CS(0x00000000); // encLumaMBCoeffCost - RVCE_CS(0x00000000); // encChromaCoeffCost - RVCE_END(); + RVCE_BEGIN(0x04000008); // rdo + RVCE_CS(0x00000000); // encDisableTbePredIFrame + RVCE_CS(0x00000000); // encDisableTbePredPFrame + RVCE_CS(0x00000000); // useFmeInterpolY + RVCE_CS(0x00000000); // useFmeInterpolUV + RVCE_CS(0x00000000); // useFmeIntrapolY + RVCE_CS(0x00000000); // useFmeIntrapolUV + RVCE_CS(0x00000000); // useFmeInterpolY_1 + RVCE_CS(0x00000000); // useFmeInterpolUV_1 + RVCE_CS(0x00000000); // useFmeIntrapolY_1 + RVCE_CS(0x00000000); // useFmeIntrapolUV_1 + RVCE_CS(0x00000000); // enc16x16CostAdj + RVCE_CS(0x00000000); // encSkipCostAdj + RVCE_CS(0x00000000); // encForce16x16skip + RVCE_CS(0x00000000); // encDisableThresholdCalcA + RVCE_CS(0x00000000); // encLumaCoeffCost + RVCE_CS(0x00000000); // encLumaMBCoeffCost + RVCE_CS(0x00000000); // encChromaCoeffCost + RVCE_END(); } static void vui(struct rvce_encoder *enc) { - int i; - - if (!enc->pic.rate_ctrl.frame_rate_num) - return; - - RVCE_BEGIN(0x04000009); // vui - RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag - RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc - RVCE_CS(0x00000000); //aspectRatioInfo.sarWidth - RVCE_CS(0x00000000); //aspectRatioInfo.sarHeight - RVCE_CS(0x00000000); //overscanInfoPresentFlag - RVCE_CS(0x00000000); //overScanInfo.overscanAppropFlag - RVCE_CS(0x00000000); //videoSignalTypePresentFlag - RVCE_CS(0x00000005); //videoSignalTypeInfo.videoFormat - RVCE_CS(0x00000000); //videoSignalTypeInfo.videoFullRangeFlag - RVCE_CS(0x00000000); //videoSignalTypeInfo.colorDescriptionPresentFlag - RVCE_CS(0x00000002); //videoSignalTypeInfo.colorPrim - RVCE_CS(0x00000002); //videoSignalTypeInfo.transferChar - RVCE_CS(0x00000002); //videoSignalTypeInfo.matrixCoef - RVCE_CS(0x00000000); //chromaLocInfoPresentFlag - RVCE_CS(0x00000000); //chromaLocInfo.chromaLocTop - RVCE_CS(0x00000000); //chromaLocInfo.chromaLocBottom - RVCE_CS(0x00000001); //timingInfoPresentFlag - RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); //timingInfo.numUnitsInTick - RVCE_CS(enc->pic.rate_ctrl.frame_rate_num * 2); //timingInfo.timeScale; - RVCE_CS(0x00000001); //timingInfo.fixedFrameRateFlag - RVCE_CS(0x00000000); //nalHRDParametersPresentFlag - RVCE_CS(0x00000000); //hrdParam.cpbCntMinus1 - RVCE_CS(0x00000004); //hrdParam.bitRateScale - RVCE_CS(0x00000006); //hrdParam.cpbSizeScale - for (i = 0; i < 32; i++) { - RVCE_CS(0x00000000); //hrdParam.bitRateValueMinus - RVCE_CS(0x00000000); //hrdParam.cpbSizeValueMinus - RVCE_CS(0x00000000); //hrdParam.cbrFlag - } - RVCE_CS(0x00000017); //hrdParam.initialCpbRemovalDelayLengthMinus1 - RVCE_CS(0x00000017); //hrdParam.cpbRemovalDelayLengthMinus1 - RVCE_CS(0x00000017); //hrdParam.dpbOutputDelayLengthMinus1 - RVCE_CS(0x00000018); //hrdParam.timeOffsetLength - RVCE_CS(0x00000000); //lowDelayHRDFlag - RVCE_CS(0x00000000); //picStructPresentFlag - RVCE_CS(0x00000000); //bitstreamRestrictionPresentFlag - RVCE_CS(0x00000001); //bitstreamRestrictions.motionVectorsOverPicBoundariesFlag - RVCE_CS(0x00000002); //bitstreamRestrictions.maxBytesPerPicDenom - RVCE_CS(0x00000001); //bitstreamRestrictions.maxBitsPerMbDenom - RVCE_CS(0x00000010); //bitstreamRestrictions.log2MaxMvLengthHori - RVCE_CS(0x00000010); //bitstreamRestrictions.log2MaxMvLengthVert - RVCE_CS(0x00000003); //bitstreamRestrictions.numReorderFrames - RVCE_CS(0x00000003); //bitstreamRestrictions.maxDecFrameBuffering - RVCE_END(); + int i; + + if (!enc->pic.rate_ctrl.frame_rate_num) + return; + + RVCE_BEGIN(0x04000009); // vui + RVCE_CS(0x00000000); // aspectRatioInfoPresentFlag + RVCE_CS(0x00000000); // aspectRatioInfo.aspectRatioIdc + RVCE_CS(0x00000000); // aspectRatioInfo.sarWidth + RVCE_CS(0x00000000); // aspectRatioInfo.sarHeight + RVCE_CS(0x00000000); // overscanInfoPresentFlag + RVCE_CS(0x00000000); // overScanInfo.overscanAppropFlag + RVCE_CS(0x00000000); // videoSignalTypePresentFlag + RVCE_CS(0x00000005); // videoSignalTypeInfo.videoFormat + RVCE_CS(0x00000000); // videoSignalTypeInfo.videoFullRangeFlag + RVCE_CS(0x00000000); // videoSignalTypeInfo.colorDescriptionPresentFlag + RVCE_CS(0x00000002); // videoSignalTypeInfo.colorPrim + RVCE_CS(0x00000002); // videoSignalTypeInfo.transferChar + RVCE_CS(0x00000002); // videoSignalTypeInfo.matrixCoef + RVCE_CS(0x00000000); // chromaLocInfoPresentFlag + RVCE_CS(0x00000000); // chromaLocInfo.chromaLocTop + RVCE_CS(0x00000000); // chromaLocInfo.chromaLocBottom + RVCE_CS(0x00000001); // timingInfoPresentFlag + RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // timingInfo.numUnitsInTick + RVCE_CS(enc->pic.rate_ctrl.frame_rate_num * 2); // timingInfo.timeScale; + RVCE_CS(0x00000001); // timingInfo.fixedFrameRateFlag + RVCE_CS(0x00000000); // nalHRDParametersPresentFlag + RVCE_CS(0x00000000); // hrdParam.cpbCntMinus1 + RVCE_CS(0x00000004); // hrdParam.bitRateScale + RVCE_CS(0x00000006); // hrdParam.cpbSizeScale + for (i = 0; i < 32; i++) { + RVCE_CS(0x00000000); // hrdParam.bitRateValueMinus + RVCE_CS(0x00000000); // hrdParam.cpbSizeValueMinus + RVCE_CS(0x00000000); // hrdParam.cbrFlag + } + RVCE_CS(0x00000017); // hrdParam.initialCpbRemovalDelayLengthMinus1 + RVCE_CS(0x00000017); // hrdParam.cpbRemovalDelayLengthMinus1 + RVCE_CS(0x00000017); // hrdParam.dpbOutputDelayLengthMinus1 + RVCE_CS(0x00000018); // hrdParam.timeOffsetLength + RVCE_CS(0x00000000); // lowDelayHRDFlag + RVCE_CS(0x00000000); // picStructPresentFlag + RVCE_CS(0x00000000); // bitstreamRestrictionPresentFlag + RVCE_CS(0x00000001); // bitstreamRestrictions.motionVectorsOverPicBoundariesFlag + RVCE_CS(0x00000002); // bitstreamRestrictions.maxBytesPerPicDenom + RVCE_CS(0x00000001); // bitstreamRestrictions.maxBitsPerMbDenom + RVCE_CS(0x00000010); // bitstreamRestrictions.log2MaxMvLengthHori + RVCE_CS(0x00000010); // bitstreamRestrictions.log2MaxMvLengthVert + RVCE_CS(0x00000003); // bitstreamRestrictions.numReorderFrames + RVCE_CS(0x00000003); // bitstreamRestrictions.maxDecFrameBuffering + RVCE_END(); } static void config(struct rvce_encoder *enc) { - enc->task_info(enc, 0x00000002, 0, 0xffffffff, 0); - enc->rate_control(enc); - enc->config_extension(enc); - enc->motion_estimation(enc); - enc->rdo(enc); - if (enc->use_vui) - enc->vui(enc); - enc->pic_control(enc); + enc->task_info(enc, 0x00000002, 0, 0xffffffff, 0); + enc->rate_control(enc); + enc->config_extension(enc); + enc->motion_estimation(enc); + enc->rdo(enc); + if (enc->use_vui) + enc->vui(enc); + enc->pic_control(enc); } static void encode(struct rvce_encoder *enc) { - signed luma_offset, chroma_offset; - int i; - - enc->task_info(enc, 0x00000003, 0, 0, 0); - - RVCE_BEGIN(0x05000001); // context buffer - RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0x0); // encodeContextAddressHi/Lo - RVCE_END(); - - RVCE_BEGIN(0x05000004); // video bitstream buffer - RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0x0); // videoBitstreamRingAddressHi/Lo - RVCE_CS(enc->bs_size); // videoBitstreamRingSize - RVCE_END(); - - RVCE_BEGIN(0x03000001); // encode - RVCE_CS(0x00000000); // insertHeaders - RVCE_CS(0x00000000); // pictureStructure - RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize - RVCE_CS(0x00000000); // forceRefreshMap - RVCE_CS(0x00000000); // insertAUD - RVCE_CS(0x00000000); // endOfSequence - RVCE_CS(0x00000000); // endOfStream - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo - RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch - RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch - RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch - RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode - RVCE_CS(0x00000000); // encInputPicTileConfig - RVCE_CS(enc->pic.picture_type); // encPicType - RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag - RVCE_CS(0x00000000); // encIdrPicId - RVCE_CS(0x00000000); // encMGSKeyPic - RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag - RVCE_CS(0x00000000); // encTemporalLayerIndex - RVCE_CS(0x00000000); // num_ref_idx_active_override_flag - RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 - RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 - - i = enc->pic.frame_num - enc->pic.ref_idx_l0; - if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { - RVCE_CS(0x00000001); // encRefListModificationOp - RVCE_CS(i - 1); // encRefListModificationNum - } else { - RVCE_CS(0x00000000); // encRefListModificationOp - RVCE_CS(0x00000000); // encRefListModificationNum - } - - for (i = 0; i < 3; ++i) { - RVCE_CS(0x00000000); // encRefListModificationOp - RVCE_CS(0x00000000); // encRefListModificationNum - } - for (i = 0; i < 4; ++i) { - RVCE_CS(0x00000000); // encDecodedPictureMarkingOp - RVCE_CS(0x00000000); // encDecodedPictureMarkingNum - RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx - RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp - RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum - } - - // encReferencePictureL0[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || - enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l0 = si_l0_slot(enc); - si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); - RVCE_CS(l0->picture_type); // encPicType - RVCE_CS(l0->frame_num); // frameNumber - RVCE_CS(l0->pic_order_cnt); // pictureOrderCount - RVCE_CS(luma_offset); // lumaOffset - RVCE_CS(chroma_offset); // chromaOffset - } else { - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - } - - // encReferencePictureL0[1] - RVCE_CS(0x00000000); // pictureStructure - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - - // encReferencePictureL1[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l1 = si_l1_slot(enc); - si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); - RVCE_CS(l1->picture_type); // encPicType - RVCE_CS(l1->frame_num); // frameNumber - RVCE_CS(l1->pic_order_cnt); // pictureOrderCount - RVCE_CS(luma_offset); // lumaOffset - RVCE_CS(chroma_offset); // chromaOffset - } else { - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - } - - si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); - RVCE_CS(luma_offset); // encReconstructedLumaOffset - RVCE_CS(chroma_offset); // encReconstructedChromaOffset - RVCE_CS(0x00000000); // encColocBufferOffset - RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset - RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset - RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset - RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset - RVCE_CS(0x00000000); // pictureCount - RVCE_CS(enc->pic.frame_num); // frameNumber - RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount - RVCE_CS(0x00000000); // numIPicRemainInRCGOP - RVCE_CS(0x00000000); // numPPicRemainInRCGOP - RVCE_CS(0x00000000); // numBPicRemainInRCGOP - RVCE_CS(0x00000000); // numIRPicRemainInRCGOP - RVCE_CS(0x00000000); // enableIntraRefresh - RVCE_END(); + signed luma_offset, chroma_offset; + int i; + + enc->task_info(enc, 0x00000003, 0, 0, 0); + + RVCE_BEGIN(0x05000001); // context buffer + RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0x0); // encodeContextAddressHi/Lo + RVCE_END(); + + RVCE_BEGIN(0x05000004); // video bitstream buffer + RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0x0); // videoBitstreamRingAddressHi/Lo + RVCE_CS(enc->bs_size); // videoBitstreamRingSize + RVCE_END(); + + RVCE_BEGIN(0x03000001); // encode + RVCE_CS(0x00000000); // insertHeaders + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize + RVCE_CS(0x00000000); // forceRefreshMap + RVCE_CS(0x00000000); // insertAUD + RVCE_CS(0x00000000); // endOfSequence + RVCE_CS(0x00000000); // endOfStream + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo + RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch + RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch + RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch + RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode + RVCE_CS(0x00000000); // encInputPicTileConfig + RVCE_CS(enc->pic.picture_type); // encPicType + RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag + RVCE_CS(0x00000000); // encIdrPicId + RVCE_CS(0x00000000); // encMGSKeyPic + RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag + RVCE_CS(0x00000000); // encTemporalLayerIndex + RVCE_CS(0x00000000); // num_ref_idx_active_override_flag + RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 + RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 + + i = enc->pic.frame_num - enc->pic.ref_idx_l0; + if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { + RVCE_CS(0x00000001); // encRefListModificationOp + RVCE_CS(i - 1); // encRefListModificationNum + } else { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + + for (i = 0; i < 3; ++i) { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + for (i = 0; i < 4; ++i) { + RVCE_CS(0x00000000); // encDecodedPictureMarkingOp + RVCE_CS(0x00000000); // encDecodedPictureMarkingNum + RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum + } + + // encReferencePictureL0[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l0 = si_l0_slot(enc); + si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); + RVCE_CS(l0->picture_type); // encPicType + RVCE_CS(l0->frame_num); // frameNumber + RVCE_CS(l0->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + // encReferencePictureL0[1] + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + + // encReferencePictureL1[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l1 = si_l1_slot(enc); + si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); + RVCE_CS(l1->picture_type); // encPicType + RVCE_CS(l1->frame_num); // frameNumber + RVCE_CS(l1->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); + RVCE_CS(luma_offset); // encReconstructedLumaOffset + RVCE_CS(chroma_offset); // encReconstructedChromaOffset + RVCE_CS(0x00000000); // encColocBufferOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset + RVCE_CS(0x00000000); // pictureCount + RVCE_CS(enc->pic.frame_num); // frameNumber + RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount + RVCE_CS(0x00000000); // numIPicRemainInRCGOP + RVCE_CS(0x00000000); // numPPicRemainInRCGOP + RVCE_CS(0x00000000); // numBPicRemainInRCGOP + RVCE_CS(0x00000000); // numIRPicRemainInRCGOP + RVCE_CS(0x00000000); // enableIntraRefresh + RVCE_END(); } static void destroy(struct rvce_encoder *enc) { - enc->task_info(enc, 0x00000001, 0, 0, 0); + enc->task_info(enc, 0x00000001, 0, 0, 0); - feedback(enc); + feedback(enc); - RVCE_BEGIN(0x02000001); // destroy - RVCE_END(); + RVCE_BEGIN(0x02000001); // destroy + RVCE_END(); } void si_vce_40_2_2_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) @@ -430,18 +427,18 @@ void si_vce_40_2_2_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_pict void si_vce_40_2_2_init(struct rvce_encoder *enc) { - enc->session = session; - enc->task_info = task_info; - enc->create = create; - enc->feedback = feedback; - enc->rate_control = rate_control; - enc->config_extension = config_extension; - enc->pic_control = pic_control; - enc->motion_estimation = motion_estimation; - enc->rdo = rdo; - enc->vui = vui; - enc->config = config; - enc->encode = encode; - enc->destroy = destroy; - enc->si_get_pic_param = si_vce_40_2_2_get_param; + enc->session = session; + enc->task_info = task_info; + enc->create = create; + enc->feedback = feedback; + enc->rate_control = rate_control; + enc->config_extension = config_extension; + enc->pic_control = pic_control; + enc->motion_estimation = motion_estimation; + enc->rdo = rdo; + enc->vui = vui; + enc->config = config; + enc->encode = encode; + enc->destroy = destroy; + enc->si_get_pic_param = si_vce_40_2_2_get_param; } diff --git a/src/gallium/drivers/radeon/radeon_vce_50.c b/src/gallium/drivers/radeon/radeon_vce_50.c index e480ecb749f..9f972eef600 100644 --- a/src/gallium/drivers/radeon/radeon_vce_50.c +++ b/src/gallium/drivers/radeon/radeon_vce_50.c @@ -25,206 +25,203 @@ * **************************************************************************/ -#include <stdio.h> - #include "pipe/p_video_codec.h" - -#include "util/u_video.h" +#include "radeon_vce.h" +#include "radeon_video.h" +#include "si_pipe.h" #include "util/u_memory.h" - +#include "util/u_video.h" #include "vl/vl_video_buffer.h" -#include "si_pipe.h" -#include "radeon_video.h" -#include "radeon_vce.h" +#include <stdio.h> static void rate_control(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000005); // rate control - RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod - RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate - RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate - RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum - RVCE_CS(0x00000000); // encGOPSize - RVCE_CS(enc->pic.quant_i_frames); // encQP_I - RVCE_CS(enc->pic.quant_p_frames); // encQP_P - RVCE_CS(enc->pic.quant_b_frames); // encQP_B - RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize - RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen - RVCE_CS(0x00000000); // encVBVBufferLevel - RVCE_CS(0x00000000); // encMaxAUSize - RVCE_CS(0x00000000); // encQPInitialMode - RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture - RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger - RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional - RVCE_CS(0x00000000); // encMinQP - RVCE_CS(0x00000033); // encMaxQP - RVCE_CS(0x00000000); // encSkipFrameEnable - RVCE_CS(0x00000000); // encFillerDataEnable - RVCE_CS(0x00000000); // encEnforceHRD - RVCE_CS(0x00000000); // encBPicsDeltaQP - RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP - RVCE_CS(0x00000000); // encRateControlReInitDisable - RVCE_CS(0x00000000); // encLCVBRInitQPFlag - RVCE_CS(0x00000000); // encLCVBRSATDBasedNonlinearBitBudgetFlag - RVCE_END(); + RVCE_BEGIN(0x04000005); // rate control + RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod + RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate + RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate + RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum + RVCE_CS(0x00000000); // encGOPSize + RVCE_CS(enc->pic.quant_i_frames); // encQP_I + RVCE_CS(enc->pic.quant_p_frames); // encQP_P + RVCE_CS(enc->pic.quant_b_frames); // encQP_B + RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize + RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen + RVCE_CS(0x00000000); // encVBVBufferLevel + RVCE_CS(0x00000000); // encMaxAUSize + RVCE_CS(0x00000000); // encQPInitialMode + RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture + RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger + RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional + RVCE_CS(0x00000000); // encMinQP + RVCE_CS(0x00000033); // encMaxQP + RVCE_CS(0x00000000); // encSkipFrameEnable + RVCE_CS(0x00000000); // encFillerDataEnable + RVCE_CS(0x00000000); // encEnforceHRD + RVCE_CS(0x00000000); // encBPicsDeltaQP + RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP + RVCE_CS(0x00000000); // encRateControlReInitDisable + RVCE_CS(0x00000000); // encLCVBRInitQPFlag + RVCE_CS(0x00000000); // encLCVBRSATDBasedNonlinearBitBudgetFlag + RVCE_END(); } static void encode(struct rvce_encoder *enc) { - signed luma_offset, chroma_offset, bs_offset; - unsigned dep, bs_idx = enc->bs_idx++; - int i; - - if (enc->dual_inst) { - if (bs_idx == 0) - dep = 1; - else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) - dep = 0; - else - dep = 2; - } else - dep = 0; - - enc->task_info(enc, 0x00000003, dep, 0, bs_idx); - - RVCE_BEGIN(0x05000001); // context buffer - RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo - RVCE_END(); - - bs_offset = -(signed)(bs_idx * enc->bs_size); - - RVCE_BEGIN(0x05000004); // video bitstream buffer - RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo - RVCE_CS(enc->bs_size); // videoBitstreamRingSize - RVCE_END(); - - if (enc->dual_pipe) { - unsigned aux_offset = enc->cpb.res->buf->size - - RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; - RVCE_BEGIN(0x05000002); // auxiliary buffer - for (i = 0; i < 8; ++i) { - RVCE_CS(aux_offset); - aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE; - } - for (i = 0; i < 8; ++i) - RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE); - RVCE_END(); - } - - RVCE_BEGIN(0x03000001); // encode - RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders - RVCE_CS(0x00000000); // pictureStructure - RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize - RVCE_CS(0x00000000); // forceRefreshMap - RVCE_CS(0x00000000); // insertAUD - RVCE_CS(0x00000000); // endOfSequence - RVCE_CS(0x00000000); // endOfStream - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo - RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch - RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch - RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch - if (enc->dual_pipe) - RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) - else - RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) - RVCE_CS(0x00000000); // encInputPicTileConfig - RVCE_CS(enc->pic.picture_type); // encPicType - RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag - RVCE_CS(0x00000000); // encIdrPicId - RVCE_CS(0x00000000); // encMGSKeyPic - RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag - RVCE_CS(0x00000000); // encTemporalLayerIndex - RVCE_CS(0x00000000); // num_ref_idx_active_override_flag - RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 - RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 - - i = enc->pic.frame_num - enc->pic.ref_idx_l0; - if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { - RVCE_CS(0x00000001); // encRefListModificationOp - RVCE_CS(i - 1); // encRefListModificationNum - } else { - RVCE_CS(0x00000000); // encRefListModificationOp - RVCE_CS(0x00000000); // encRefListModificationNum - } - - for (i = 0; i < 3; ++i) { - RVCE_CS(0x00000000); // encRefListModificationOp - RVCE_CS(0x00000000); // encRefListModificationNum - } - for (i = 0; i < 4; ++i) { - RVCE_CS(0x00000000); // encDecodedPictureMarkingOp - RVCE_CS(0x00000000); // encDecodedPictureMarkingNum - RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx - RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp - RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum - } - - // encReferencePictureL0[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || - enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l0 = si_l0_slot(enc); - si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); - RVCE_CS(l0->picture_type); // encPicType - RVCE_CS(l0->frame_num); // frameNumber - RVCE_CS(l0->pic_order_cnt); // pictureOrderCount - RVCE_CS(luma_offset); // lumaOffset - RVCE_CS(chroma_offset); // chromaOffset - } else { - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - } - - // encReferencePictureL0[1] - RVCE_CS(0x00000000); // pictureStructure - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - - // encReferencePictureL1[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l1 = si_l1_slot(enc); - si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); - RVCE_CS(l1->picture_type); // encPicType - RVCE_CS(l1->frame_num); // frameNumber - RVCE_CS(l1->pic_order_cnt); // pictureOrderCount - RVCE_CS(luma_offset); // lumaOffset - RVCE_CS(chroma_offset); // chromaOffset - } else { - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - } - - si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); - RVCE_CS(luma_offset); // encReconstructedLumaOffset - RVCE_CS(chroma_offset); // encReconstructedChromaOffset - RVCE_CS(0x00000000); // encColocBufferOffset - RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset - RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset - RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset - RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset - RVCE_CS(0x00000000); // pictureCount - RVCE_CS(enc->pic.frame_num); // frameNumber - RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount - RVCE_CS(0x00000000); // numIPicRemainInRCGOP - RVCE_CS(0x00000000); // numPPicRemainInRCGOP - RVCE_CS(0x00000000); // numBPicRemainInRCGOP - RVCE_CS(0x00000000); // numIRPicRemainInRCGOP - RVCE_CS(0x00000000); // enableIntraRefresh - RVCE_END(); + signed luma_offset, chroma_offset, bs_offset; + unsigned dep, bs_idx = enc->bs_idx++; + int i; + + if (enc->dual_inst) { + if (bs_idx == 0) + dep = 1; + else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) + dep = 0; + else + dep = 2; + } else + dep = 0; + + enc->task_info(enc, 0x00000003, dep, 0, bs_idx); + + RVCE_BEGIN(0x05000001); // context buffer + RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo + RVCE_END(); + + bs_offset = -(signed)(bs_idx * enc->bs_size); + + RVCE_BEGIN(0x05000004); // video bitstream buffer + RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo + RVCE_CS(enc->bs_size); // videoBitstreamRingSize + RVCE_END(); + + if (enc->dual_pipe) { + unsigned aux_offset = + enc->cpb.res->buf->size - RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; + RVCE_BEGIN(0x05000002); // auxiliary buffer + for (i = 0; i < 8; ++i) { + RVCE_CS(aux_offset); + aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE; + } + for (i = 0; i < 8; ++i) + RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE); + RVCE_END(); + } + + RVCE_BEGIN(0x03000001); // encode + RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize + RVCE_CS(0x00000000); // forceRefreshMap + RVCE_CS(0x00000000); // insertAUD + RVCE_CS(0x00000000); // endOfSequence + RVCE_CS(0x00000000); // endOfStream + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo + RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch + RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch + RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch + if (enc->dual_pipe) + RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) + else + RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) + RVCE_CS(0x00000000); // encInputPicTileConfig + RVCE_CS(enc->pic.picture_type); // encPicType + RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag + RVCE_CS(0x00000000); // encIdrPicId + RVCE_CS(0x00000000); // encMGSKeyPic + RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag + RVCE_CS(0x00000000); // encTemporalLayerIndex + RVCE_CS(0x00000000); // num_ref_idx_active_override_flag + RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 + RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 + + i = enc->pic.frame_num - enc->pic.ref_idx_l0; + if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { + RVCE_CS(0x00000001); // encRefListModificationOp + RVCE_CS(i - 1); // encRefListModificationNum + } else { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + + for (i = 0; i < 3; ++i) { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + for (i = 0; i < 4; ++i) { + RVCE_CS(0x00000000); // encDecodedPictureMarkingOp + RVCE_CS(0x00000000); // encDecodedPictureMarkingNum + RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum + } + + // encReferencePictureL0[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l0 = si_l0_slot(enc); + si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); + RVCE_CS(l0->picture_type); // encPicType + RVCE_CS(l0->frame_num); // frameNumber + RVCE_CS(l0->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + // encReferencePictureL0[1] + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + + // encReferencePictureL1[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l1 = si_l1_slot(enc); + si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); + RVCE_CS(l1->picture_type); // encPicType + RVCE_CS(l1->frame_num); // frameNumber + RVCE_CS(l1->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); + RVCE_CS(luma_offset); // encReconstructedLumaOffset + RVCE_CS(chroma_offset); // encReconstructedChromaOffset + RVCE_CS(0x00000000); // encColocBufferOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset + RVCE_CS(0x00000000); // pictureCount + RVCE_CS(enc->pic.frame_num); // frameNumber + RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount + RVCE_CS(0x00000000); // numIPicRemainInRCGOP + RVCE_CS(0x00000000); // numPPicRemainInRCGOP + RVCE_CS(0x00000000); // numBPicRemainInRCGOP + RVCE_CS(0x00000000); // numIRPicRemainInRCGOP + RVCE_CS(0x00000000); // enableIntraRefresh + RVCE_END(); } void si_vce_50_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) @@ -233,10 +230,10 @@ void si_vce_50_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_ void si_vce_50_init(struct rvce_encoder *enc) { - si_vce_40_2_2_init(enc); + si_vce_40_2_2_init(enc); - /* only the two below are different */ - enc->rate_control = rate_control; - enc->encode = encode; - enc->si_get_pic_param = si_vce_50_get_param; + /* only the two below are different */ + enc->rate_control = rate_control; + enc->encode = encode; + enc->si_get_pic_param = si_vce_50_get_param; } diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c b/src/gallium/drivers/radeon/radeon_vce_52.c index bb97cdfe5c3..141032d6617 100644 --- a/src/gallium/drivers/radeon/radeon_vce_52.c +++ b/src/gallium/drivers/radeon/radeon_vce_52.c @@ -25,647 +25,647 @@ * **************************************************************************/ -#include <stdio.h> - #include "pipe/p_video_codec.h" - -#include "util/u_video.h" +#include "radeon_vce.h" +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" - +#include "util/u_video.h" #include "vl/vl_video_buffer.h" -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_vce.h" +#include <stdio.h> static void get_rate_control_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) { - enc->enc_pic.rc.rc_method = pic->rate_ctrl.rate_ctrl_method; - enc->enc_pic.rc.target_bitrate = pic->rate_ctrl.target_bitrate; - enc->enc_pic.rc.peak_bitrate = pic->rate_ctrl.peak_bitrate; - enc->enc_pic.rc.quant_i_frames = pic->quant_i_frames; - enc->enc_pic.rc.quant_p_frames = pic->quant_p_frames; - enc->enc_pic.rc.quant_b_frames = pic->quant_b_frames; - enc->enc_pic.rc.gop_size = pic->gop_size; - enc->enc_pic.rc.frame_rate_num = pic->rate_ctrl.frame_rate_num; - enc->enc_pic.rc.frame_rate_den = pic->rate_ctrl.frame_rate_den; - enc->enc_pic.rc.max_qp = 51; - enc->enc_pic.rc.vbv_buffer_size = pic->rate_ctrl.vbv_buffer_size; - enc->enc_pic.rc.vbv_buf_lv = pic->rate_ctrl.vbv_buf_lv; - enc->enc_pic.rc.fill_data_enable = pic->rate_ctrl.fill_data_enable; - enc->enc_pic.rc.enforce_hrd = pic->rate_ctrl.enforce_hrd; - enc->enc_pic.rc.target_bits_picture = pic->rate_ctrl.target_bits_picture; - enc->enc_pic.rc.peak_bits_picture_integer = pic->rate_ctrl.peak_bits_picture_integer; - enc->enc_pic.rc.peak_bits_picture_fraction = pic->rate_ctrl.peak_bits_picture_fraction; + enc->enc_pic.rc.rc_method = pic->rate_ctrl.rate_ctrl_method; + enc->enc_pic.rc.target_bitrate = pic->rate_ctrl.target_bitrate; + enc->enc_pic.rc.peak_bitrate = pic->rate_ctrl.peak_bitrate; + enc->enc_pic.rc.quant_i_frames = pic->quant_i_frames; + enc->enc_pic.rc.quant_p_frames = pic->quant_p_frames; + enc->enc_pic.rc.quant_b_frames = pic->quant_b_frames; + enc->enc_pic.rc.gop_size = pic->gop_size; + enc->enc_pic.rc.frame_rate_num = pic->rate_ctrl.frame_rate_num; + enc->enc_pic.rc.frame_rate_den = pic->rate_ctrl.frame_rate_den; + enc->enc_pic.rc.max_qp = 51; + enc->enc_pic.rc.vbv_buffer_size = pic->rate_ctrl.vbv_buffer_size; + enc->enc_pic.rc.vbv_buf_lv = pic->rate_ctrl.vbv_buf_lv; + enc->enc_pic.rc.fill_data_enable = pic->rate_ctrl.fill_data_enable; + enc->enc_pic.rc.enforce_hrd = pic->rate_ctrl.enforce_hrd; + enc->enc_pic.rc.target_bits_picture = pic->rate_ctrl.target_bits_picture; + enc->enc_pic.rc.peak_bits_picture_integer = pic->rate_ctrl.peak_bits_picture_integer; + enc->enc_pic.rc.peak_bits_picture_fraction = pic->rate_ctrl.peak_bits_picture_fraction; } -static void get_motion_estimation_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) +static void get_motion_estimation_param(struct rvce_encoder *enc, + struct pipe_h264_enc_picture_desc *pic) { - enc->enc_pic.me.motion_est_quarter_pixel = pic->motion_est.motion_est_quarter_pixel; - enc->enc_pic.me.enc_disable_sub_mode = pic->motion_est.enc_disable_sub_mode; - enc->enc_pic.me.lsmvert = pic->motion_est.lsmvert; - enc->enc_pic.me.enc_en_ime_overw_dis_subm = pic->motion_est.enc_en_ime_overw_dis_subm; - enc->enc_pic.me.enc_ime_overw_dis_subm_no = pic->motion_est.enc_ime_overw_dis_subm_no; - enc->enc_pic.me.enc_ime2_search_range_x = pic->motion_est.enc_ime2_search_range_x; - enc->enc_pic.me.enc_ime2_search_range_y = pic->motion_est.enc_ime2_search_range_y; - enc->enc_pic.me.enc_ime_decimation_search = 0x00000001; - enc->enc_pic.me.motion_est_half_pixel = 0x00000001; - enc->enc_pic.me.enc_search_range_x = 0x00000010; - enc->enc_pic.me.enc_search_range_y = 0x00000010; - enc->enc_pic.me.enc_search1_range_x = 0x00000010; - enc->enc_pic.me.enc_search1_range_y = 0x00000010; + enc->enc_pic.me.motion_est_quarter_pixel = pic->motion_est.motion_est_quarter_pixel; + enc->enc_pic.me.enc_disable_sub_mode = pic->motion_est.enc_disable_sub_mode; + enc->enc_pic.me.lsmvert = pic->motion_est.lsmvert; + enc->enc_pic.me.enc_en_ime_overw_dis_subm = pic->motion_est.enc_en_ime_overw_dis_subm; + enc->enc_pic.me.enc_ime_overw_dis_subm_no = pic->motion_est.enc_ime_overw_dis_subm_no; + enc->enc_pic.me.enc_ime2_search_range_x = pic->motion_est.enc_ime2_search_range_x; + enc->enc_pic.me.enc_ime2_search_range_y = pic->motion_est.enc_ime2_search_range_y; + enc->enc_pic.me.enc_ime_decimation_search = 0x00000001; + enc->enc_pic.me.motion_est_half_pixel = 0x00000001; + enc->enc_pic.me.enc_search_range_x = 0x00000010; + enc->enc_pic.me.enc_search_range_y = 0x00000010; + enc->enc_pic.me.enc_search1_range_x = 0x00000010; + enc->enc_pic.me.enc_search1_range_y = 0x00000010; } static void get_pic_control_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) { - unsigned encNumMBsPerSlice; - encNumMBsPerSlice = align(enc->base.width, 16) / 16; - encNumMBsPerSlice *= align(enc->base.height, 16) / 16; - if (pic->pic_ctrl.enc_frame_cropping_flag) { - enc->enc_pic.pc.enc_crop_left_offset = pic->pic_ctrl.enc_frame_crop_left_offset; - enc->enc_pic.pc.enc_crop_right_offset = pic->pic_ctrl.enc_frame_crop_right_offset; - enc->enc_pic.pc.enc_crop_top_offset = pic->pic_ctrl.enc_frame_crop_top_offset; - enc->enc_pic.pc.enc_crop_bottom_offset = pic->pic_ctrl.enc_frame_crop_bottom_offset; - } else { - enc->enc_pic.pc.enc_crop_right_offset = (align(enc->base.width, 16) - enc->base.width) >> 1; - enc->enc_pic.pc.enc_crop_bottom_offset = (align(enc->base.height, 16) - enc->base.height) >> 1; - } - enc->enc_pic.pc.enc_num_mbs_per_slice = encNumMBsPerSlice; - enc->enc_pic.pc.enc_b_pic_pattern = MAX2(enc->base.max_references, 1) - 1; - enc->enc_pic.pc.enc_number_of_reference_frames = MIN2(enc->base.max_references, 2); - enc->enc_pic.pc.enc_max_num_ref_frames = enc->base.max_references + 1; - enc->enc_pic.pc.enc_num_default_active_ref_l0 = 0x00000001; - enc->enc_pic.pc.enc_num_default_active_ref_l1 = 0x00000001; - enc->enc_pic.pc.enc_cabac_enable = pic->pic_ctrl.enc_cabac_enable; - enc->enc_pic.pc.enc_constraint_set_flags = pic->pic_ctrl.enc_constraint_set_flags; - enc->enc_pic.pc.enc_num_default_active_ref_l0 = 0x00000001; - enc->enc_pic.pc.enc_num_default_active_ref_l1 = 0x00000001; + unsigned encNumMBsPerSlice; + encNumMBsPerSlice = align(enc->base.width, 16) / 16; + encNumMBsPerSlice *= align(enc->base.height, 16) / 16; + if (pic->pic_ctrl.enc_frame_cropping_flag) { + enc->enc_pic.pc.enc_crop_left_offset = pic->pic_ctrl.enc_frame_crop_left_offset; + enc->enc_pic.pc.enc_crop_right_offset = pic->pic_ctrl.enc_frame_crop_right_offset; + enc->enc_pic.pc.enc_crop_top_offset = pic->pic_ctrl.enc_frame_crop_top_offset; + enc->enc_pic.pc.enc_crop_bottom_offset = pic->pic_ctrl.enc_frame_crop_bottom_offset; + } else { + enc->enc_pic.pc.enc_crop_right_offset = (align(enc->base.width, 16) - enc->base.width) >> 1; + enc->enc_pic.pc.enc_crop_bottom_offset = + (align(enc->base.height, 16) - enc->base.height) >> 1; + } + enc->enc_pic.pc.enc_num_mbs_per_slice = encNumMBsPerSlice; + enc->enc_pic.pc.enc_b_pic_pattern = MAX2(enc->base.max_references, 1) - 1; + enc->enc_pic.pc.enc_number_of_reference_frames = MIN2(enc->base.max_references, 2); + enc->enc_pic.pc.enc_max_num_ref_frames = enc->base.max_references + 1; + enc->enc_pic.pc.enc_num_default_active_ref_l0 = 0x00000001; + enc->enc_pic.pc.enc_num_default_active_ref_l1 = 0x00000001; + enc->enc_pic.pc.enc_cabac_enable = pic->pic_ctrl.enc_cabac_enable; + enc->enc_pic.pc.enc_constraint_set_flags = pic->pic_ctrl.enc_constraint_set_flags; + enc->enc_pic.pc.enc_num_default_active_ref_l0 = 0x00000001; + enc->enc_pic.pc.enc_num_default_active_ref_l1 = 0x00000001; } static void get_task_info_param(struct rvce_encoder *enc) { - enc->enc_pic.ti.offset_of_next_task_info = 0xffffffff; + enc->enc_pic.ti.offset_of_next_task_info = 0xffffffff; } static void get_feedback_buffer_param(struct rvce_encoder *enc) { - enc->enc_pic.fb.feedback_ring_size = 0x00000001; + enc->enc_pic.fb.feedback_ring_size = 0x00000001; } static void get_config_ext_param(struct rvce_encoder *enc) { - enc->enc_pic.ce.enc_enable_perf_logging = 0x00000003; + enc->enc_pic.ce.enc_enable_perf_logging = 0x00000003; } static void get_vui_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) { - enc->enc_pic.enable_vui = pic->enable_vui; - enc->enc_pic.vui.video_format = 0x00000005; - enc->enc_pic.vui.color_prim = 0x00000002; - enc->enc_pic.vui.transfer_char = 0x00000002; - enc->enc_pic.vui.matrix_coef = 0x00000002; - enc->enc_pic.vui.timing_info_present_flag = 0x00000001; - enc->enc_pic.vui.num_units_in_tick = pic->rate_ctrl.frame_rate_den; - enc->enc_pic.vui.time_scale = pic->rate_ctrl.frame_rate_num * 2; - enc->enc_pic.vui.fixed_frame_rate_flag = 0x00000001; - enc->enc_pic.vui.bit_rate_scale = 0x00000004; - enc->enc_pic.vui.cpb_size_scale = 0x00000006; - enc->enc_pic.vui.initial_cpb_removal_delay_length_minus1 = 0x00000017; - enc->enc_pic.vui.cpb_removal_delay_length_minus1 = 0x00000017; - enc->enc_pic.vui.dpb_output_delay_length_minus1 = 0x00000017; - enc->enc_pic.vui.time_offset_length = 0x00000018; - enc->enc_pic.vui.motion_vectors_over_pic_boundaries_flag = 0x00000001; - enc->enc_pic.vui.max_bytes_per_pic_denom = 0x00000002; - enc->enc_pic.vui.max_bits_per_mb_denom = 0x00000001; - enc->enc_pic.vui.log2_max_mv_length_hori = 0x00000010; - enc->enc_pic.vui.log2_max_mv_length_vert = 0x00000010; - enc->enc_pic.vui.num_reorder_frames = 0x00000003; - enc->enc_pic.vui.max_dec_frame_buffering = 0x00000003; + enc->enc_pic.enable_vui = pic->enable_vui; + enc->enc_pic.vui.video_format = 0x00000005; + enc->enc_pic.vui.color_prim = 0x00000002; + enc->enc_pic.vui.transfer_char = 0x00000002; + enc->enc_pic.vui.matrix_coef = 0x00000002; + enc->enc_pic.vui.timing_info_present_flag = 0x00000001; + enc->enc_pic.vui.num_units_in_tick = pic->rate_ctrl.frame_rate_den; + enc->enc_pic.vui.time_scale = pic->rate_ctrl.frame_rate_num * 2; + enc->enc_pic.vui.fixed_frame_rate_flag = 0x00000001; + enc->enc_pic.vui.bit_rate_scale = 0x00000004; + enc->enc_pic.vui.cpb_size_scale = 0x00000006; + enc->enc_pic.vui.initial_cpb_removal_delay_length_minus1 = 0x00000017; + enc->enc_pic.vui.cpb_removal_delay_length_minus1 = 0x00000017; + enc->enc_pic.vui.dpb_output_delay_length_minus1 = 0x00000017; + enc->enc_pic.vui.time_offset_length = 0x00000018; + enc->enc_pic.vui.motion_vectors_over_pic_boundaries_flag = 0x00000001; + enc->enc_pic.vui.max_bytes_per_pic_denom = 0x00000002; + enc->enc_pic.vui.max_bits_per_mb_denom = 0x00000001; + enc->enc_pic.vui.log2_max_mv_length_hori = 0x00000010; + enc->enc_pic.vui.log2_max_mv_length_vert = 0x00000010; + enc->enc_pic.vui.num_reorder_frames = 0x00000003; + enc->enc_pic.vui.max_dec_frame_buffering = 0x00000003; } void si_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) { - get_rate_control_param(enc, pic); - get_motion_estimation_param(enc, pic); - get_pic_control_param(enc, pic); - get_task_info_param(enc); - get_feedback_buffer_param(enc); - get_vui_param(enc, pic); - get_config_ext_param(enc); - - enc->enc_pic.picture_type = pic->picture_type; - enc->enc_pic.frame_num = pic->frame_num; - enc->enc_pic.frame_num_cnt = pic->frame_num_cnt; - enc->enc_pic.p_remain = pic->p_remain; - enc->enc_pic.i_remain = pic->i_remain; - enc->enc_pic.gop_cnt = pic->gop_cnt; - enc->enc_pic.pic_order_cnt = pic->pic_order_cnt; - enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0; - enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1; - enc->enc_pic.not_referenced = pic->not_referenced; - if (enc->dual_inst) - enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x00000201; - else - enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x01000201; - enc->enc_pic.is_idr = (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); + get_rate_control_param(enc, pic); + get_motion_estimation_param(enc, pic); + get_pic_control_param(enc, pic); + get_task_info_param(enc); + get_feedback_buffer_param(enc); + get_vui_param(enc, pic); + get_config_ext_param(enc); + + enc->enc_pic.picture_type = pic->picture_type; + enc->enc_pic.frame_num = pic->frame_num; + enc->enc_pic.frame_num_cnt = pic->frame_num_cnt; + enc->enc_pic.p_remain = pic->p_remain; + enc->enc_pic.i_remain = pic->i_remain; + enc->enc_pic.gop_cnt = pic->gop_cnt; + enc->enc_pic.pic_order_cnt = pic->pic_order_cnt; + enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0; + enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1; + enc->enc_pic.not_referenced = pic->not_referenced; + if (enc->dual_inst) + enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x00000201; + else + enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x01000201; + enc->enc_pic.is_idr = (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); } static void create(struct rvce_encoder *enc) { - struct si_screen *sscreen = (struct si_screen *)enc->screen; - enc->task_info(enc, 0x00000000, 0, 0, 0); - - RVCE_BEGIN(0x01000001); // create cmd - RVCE_CS(enc->enc_pic.ec.enc_use_circular_buffer); - RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile - RVCE_CS(enc->base.level); // encLevel - RVCE_CS(enc->enc_pic.ec.enc_pic_struct_restriction); - RVCE_CS(enc->base.width); // encImageWidth - RVCE_CS(enc->base.height); // encImageHeight - - if (sscreen->info.chip_class < GFX9) { - RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch - RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch - RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw - } else { - RVCE_CS(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe); // encRefPicLumaPitch - RVCE_CS(enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe); // encRefPicChromaPitch - RVCE_CS(align(enc->luma->u.gfx9.surf_height, 16) / 8); // encRefYHeightInQw - } - - RVCE_CS(enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants); - - RVCE_CS(enc->enc_pic.ec.enc_pre_encode_context_buffer_offset); - RVCE_CS(enc->enc_pic.ec.enc_pre_encode_input_luma_buffer_offset); - RVCE_CS(enc->enc_pic.ec.enc_pre_encode_input_chroma_buffer_offset); - RVCE_CS(enc->enc_pic.ec.enc_pre_encode_mode_chromaflag_vbaqmode_scenechangesensitivity); - RVCE_END(); + struct si_screen *sscreen = (struct si_screen *)enc->screen; + enc->task_info(enc, 0x00000000, 0, 0, 0); + + RVCE_BEGIN(0x01000001); // create cmd + RVCE_CS(enc->enc_pic.ec.enc_use_circular_buffer); + RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile + RVCE_CS(enc->base.level); // encLevel + RVCE_CS(enc->enc_pic.ec.enc_pic_struct_restriction); + RVCE_CS(enc->base.width); // encImageWidth + RVCE_CS(enc->base.height); // encImageHeight + + if (sscreen->info.chip_class < GFX9) { + RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch + RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch + RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw + } else { + RVCE_CS(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe); // encRefPicLumaPitch + RVCE_CS(enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe); // encRefPicChromaPitch + RVCE_CS(align(enc->luma->u.gfx9.surf_height, 16) / 8); // encRefYHeightInQw + } + + RVCE_CS(enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants); + + RVCE_CS(enc->enc_pic.ec.enc_pre_encode_context_buffer_offset); + RVCE_CS(enc->enc_pic.ec.enc_pre_encode_input_luma_buffer_offset); + RVCE_CS(enc->enc_pic.ec.enc_pre_encode_input_chroma_buffer_offset); + RVCE_CS(enc->enc_pic.ec.enc_pre_encode_mode_chromaflag_vbaqmode_scenechangesensitivity); + RVCE_END(); } static void encode(struct rvce_encoder *enc) { - struct si_screen *sscreen = (struct si_screen *)enc->screen; - signed luma_offset, chroma_offset, bs_offset; - unsigned dep, bs_idx = enc->bs_idx++; - int i; - - if (enc->dual_inst) { - if (bs_idx == 0) - dep = 1; - else if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) - dep = 0; - else - dep = 2; - } else - dep = 0; - - enc->task_info(enc, 0x00000003, dep, 0, bs_idx); - - RVCE_BEGIN(0x05000001); // context buffer - RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo - RVCE_END(); - - bs_offset = -(signed)(bs_idx * enc->bs_size); - - RVCE_BEGIN(0x05000004); // video bitstream buffer - RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo - RVCE_CS(enc->bs_size); // videoBitstreamRingSize - RVCE_END(); - - if (enc->dual_pipe) { - unsigned aux_offset = enc->cpb.res->buf->size - - RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; - RVCE_BEGIN(0x05000002); // auxiliary buffer - for (i = 0; i < 8; ++i) { - RVCE_CS(aux_offset); - aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE; - } - for (i = 0; i < 8; ++i) - RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE); - RVCE_END(); - } - - RVCE_BEGIN(0x03000001); // encode - RVCE_CS(enc->enc_pic.frame_num ? 0x0 : 0x11); // insertHeaders - RVCE_CS(enc->enc_pic.eo.picture_structure); - RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize - RVCE_CS(enc->enc_pic.eo.force_refresh_map); - RVCE_CS(enc->enc_pic.eo.insert_aud); - RVCE_CS(enc->enc_pic.eo.end_of_sequence); - RVCE_CS(enc->enc_pic.eo.end_of_stream); - - if (sscreen->info.chip_class < GFX9) { - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo - RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch - RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch - RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch - } else { - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->luma->u.gfx9.surf_offset); // inputPictureLumaAddressHi/Lo - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->chroma->u.gfx9.surf_offset); // inputPictureChromaAddressHi/Lo - RVCE_CS(align(enc->luma->u.gfx9.surf_height, 16)); // encInputFrameYPitch - RVCE_CS(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe); // encInputPicLumaPitch - RVCE_CS(enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe); // encInputPicChromaPitch - } - - if (enc->dual_pipe) - enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload = 0x00000000; - else - enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload = 0x00010000; - RVCE_CS(enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload); - RVCE_CS(enc->enc_pic.eo.enc_input_pic_tile_config); - RVCE_CS(enc->enc_pic.picture_type); // encPicType - RVCE_CS(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag - if ((enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) && (enc->enc_pic.eo.enc_idr_pic_id !=0)) - enc->enc_pic.eo.enc_idr_pic_id = enc->enc_pic.idr_pic_id - 1; - else - enc->enc_pic.eo.enc_idr_pic_id = 0x00000000; - RVCE_CS(enc->enc_pic.eo.enc_idr_pic_id); - RVCE_CS(enc->enc_pic.eo.enc_mgs_key_pic); - RVCE_CS(!enc->enc_pic.not_referenced); - RVCE_CS(enc->enc_pic.eo.enc_temporal_layer_index); - RVCE_CS(enc->enc_pic.eo.num_ref_idx_active_override_flag); - RVCE_CS(enc->enc_pic.eo.num_ref_idx_l0_active_minus1); - RVCE_CS(enc->enc_pic.eo.num_ref_idx_l1_active_minus1); - - i = enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0; - if (i > 1 && enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { - enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000001; - enc->enc_pic.eo.enc_ref_list_modification_num = i - 1; - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); - } else { - enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000000; - enc->enc_pic.eo.enc_ref_list_modification_num = 0x00000000; - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); - } - - for (i = 0; i < 3; ++i) { - enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000000; - enc->enc_pic.eo.enc_ref_list_modification_num = 0x00000000; - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); - } - for (i = 0; i < 4; ++i) { - RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_op); - RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_num); - RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_idx); - RVCE_CS(enc->enc_pic.eo.enc_decoded_ref_base_picture_marking_op); - RVCE_CS(enc->enc_pic.eo.enc_decoded_ref_base_picture_marking_num); - } - - // encReferencePictureL0[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || - enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l0 = si_l0_slot(enc); - si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); - RVCE_CS(l0->picture_type); - RVCE_CS(l0->frame_num); - RVCE_CS(l0->pic_order_cnt); - RVCE_CS(luma_offset); - RVCE_CS(chroma_offset); - } else { - enc->enc_pic.eo.l0_enc_pic_type = 0x00000000; - enc->enc_pic.eo.l0_frame_number = 0x00000000; - enc->enc_pic.eo.l0_picture_order_count = 0x00000000; - enc->enc_pic.eo.l0_luma_offset = 0xffffffff; - enc->enc_pic.eo.l0_chroma_offset = 0xffffffff; - RVCE_CS(enc->enc_pic.eo.l0_enc_pic_type); - RVCE_CS(enc->enc_pic.eo.l0_frame_number); - RVCE_CS(enc->enc_pic.eo.l0_picture_order_count); - RVCE_CS(enc->enc_pic.eo.l0_luma_offset); - RVCE_CS(enc->enc_pic.eo.l0_chroma_offset); - } - - // encReferencePictureL0[1] - enc->enc_pic.eo.l0_picture_structure = 0x00000000; - enc->enc_pic.eo.l0_enc_pic_type = 0x00000000; - enc->enc_pic.eo.l0_frame_number = 0x00000000; - enc->enc_pic.eo.l0_picture_order_count = 0x00000000; - enc->enc_pic.eo.l0_luma_offset = 0xffffffff; - enc->enc_pic.eo.l0_chroma_offset = 0xffffffff; - RVCE_CS(enc->enc_pic.eo.l0_picture_structure); - RVCE_CS(enc->enc_pic.eo.l0_enc_pic_type); - RVCE_CS(enc->enc_pic.eo.l0_frame_number); - RVCE_CS(enc->enc_pic.eo.l0_picture_order_count); - RVCE_CS(enc->enc_pic.eo.l0_luma_offset); - RVCE_CS(enc->enc_pic.eo.l0_chroma_offset); - - // encReferencePictureL1[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l1 = si_l1_slot(enc); - si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); - RVCE_CS(l1->picture_type); - RVCE_CS(l1->frame_num); - RVCE_CS(l1->pic_order_cnt); - RVCE_CS(luma_offset); - RVCE_CS(chroma_offset); - } else { - enc->enc_pic.eo.l1_enc_pic_type = 0x00000000; - enc->enc_pic.eo.l1_frame_number = 0x00000000; - enc->enc_pic.eo.l1_picture_order_count = 0x00000000; - enc->enc_pic.eo.l1_luma_offset = 0xffffffff; - enc->enc_pic.eo.l1_chroma_offset = 0xffffffff; - RVCE_CS(enc->enc_pic.eo.l1_enc_pic_type); - RVCE_CS(enc->enc_pic.eo.l1_frame_number); - RVCE_CS(enc->enc_pic.eo.l1_picture_order_count); - RVCE_CS(enc->enc_pic.eo.l1_luma_offset); - RVCE_CS(enc->enc_pic.eo.l1_chroma_offset); - } - - si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); - RVCE_CS(luma_offset); - RVCE_CS(chroma_offset); - RVCE_CS(enc->enc_pic.eo.enc_coloc_buffer_offset); - RVCE_CS(enc->enc_pic.eo.enc_reconstructed_ref_base_picture_luma_offset); - RVCE_CS(enc->enc_pic.eo.enc_reconstructed_ref_base_picture_chroma_offset); - RVCE_CS(enc->enc_pic.eo.enc_reference_ref_base_picture_luma_offset); - RVCE_CS(enc->enc_pic.eo.enc_reference_ref_base_picture_chroma_offset); - RVCE_CS(enc->enc_pic.frame_num_cnt-1); - RVCE_CS(enc->enc_pic.frame_num); - RVCE_CS(enc->enc_pic.pic_order_cnt); - RVCE_CS(enc->enc_pic.i_remain); - RVCE_CS(enc->enc_pic.p_remain); - RVCE_CS(enc->enc_pic.eo.num_b_pic_remain_in_rcgop); - RVCE_CS(enc->enc_pic.eo.num_ir_pic_remain_in_rcgop); - RVCE_CS(enc->enc_pic.eo.enable_intra_refresh); - - RVCE_CS(enc->enc_pic.eo.aq_variance_en); - RVCE_CS(enc->enc_pic.eo.aq_block_size); - RVCE_CS(enc->enc_pic.eo.aq_mb_variance_sel); - RVCE_CS(enc->enc_pic.eo.aq_frame_variance_sel); - RVCE_CS(enc->enc_pic.eo.aq_param_a); - RVCE_CS(enc->enc_pic.eo.aq_param_b); - RVCE_CS(enc->enc_pic.eo.aq_param_c); - RVCE_CS(enc->enc_pic.eo.aq_param_d); - RVCE_CS(enc->enc_pic.eo.aq_param_e); - - RVCE_CS(enc->enc_pic.eo.context_in_sfb); - RVCE_END(); + struct si_screen *sscreen = (struct si_screen *)enc->screen; + signed luma_offset, chroma_offset, bs_offset; + unsigned dep, bs_idx = enc->bs_idx++; + int i; + + if (enc->dual_inst) { + if (bs_idx == 0) + dep = 1; + else if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) + dep = 0; + else + dep = 2; + } else + dep = 0; + + enc->task_info(enc, 0x00000003, dep, 0, bs_idx); + + RVCE_BEGIN(0x05000001); // context buffer + RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo + RVCE_END(); + + bs_offset = -(signed)(bs_idx * enc->bs_size); + + RVCE_BEGIN(0x05000004); // video bitstream buffer + RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo + RVCE_CS(enc->bs_size); // videoBitstreamRingSize + RVCE_END(); + + if (enc->dual_pipe) { + unsigned aux_offset = + enc->cpb.res->buf->size - RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; + RVCE_BEGIN(0x05000002); // auxiliary buffer + for (i = 0; i < 8; ++i) { + RVCE_CS(aux_offset); + aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE; + } + for (i = 0; i < 8; ++i) + RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE); + RVCE_END(); + } + + RVCE_BEGIN(0x03000001); // encode + RVCE_CS(enc->enc_pic.frame_num ? 0x0 : 0x11); // insertHeaders + RVCE_CS(enc->enc_pic.eo.picture_structure); + RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize + RVCE_CS(enc->enc_pic.eo.force_refresh_map); + RVCE_CS(enc->enc_pic.eo.insert_aud); + RVCE_CS(enc->enc_pic.eo.end_of_sequence); + RVCE_CS(enc->enc_pic.eo.end_of_stream); + + if (sscreen->info.chip_class < GFX9) { + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo + RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch + RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch + RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch + } else { + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->luma->u.gfx9.surf_offset); // inputPictureLumaAddressHi/Lo + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->chroma->u.gfx9.surf_offset); // inputPictureChromaAddressHi/Lo + RVCE_CS(align(enc->luma->u.gfx9.surf_height, 16)); // encInputFrameYPitch + RVCE_CS(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe); // encInputPicLumaPitch + RVCE_CS(enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe); // encInputPicChromaPitch + } + + if (enc->dual_pipe) + enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload = 0x00000000; + else + enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload = 0x00010000; + RVCE_CS(enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload); + RVCE_CS(enc->enc_pic.eo.enc_input_pic_tile_config); + RVCE_CS(enc->enc_pic.picture_type); // encPicType + RVCE_CS(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag + if ((enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) && + (enc->enc_pic.eo.enc_idr_pic_id != 0)) + enc->enc_pic.eo.enc_idr_pic_id = enc->enc_pic.idr_pic_id - 1; + else + enc->enc_pic.eo.enc_idr_pic_id = 0x00000000; + RVCE_CS(enc->enc_pic.eo.enc_idr_pic_id); + RVCE_CS(enc->enc_pic.eo.enc_mgs_key_pic); + RVCE_CS(!enc->enc_pic.not_referenced); + RVCE_CS(enc->enc_pic.eo.enc_temporal_layer_index); + RVCE_CS(enc->enc_pic.eo.num_ref_idx_active_override_flag); + RVCE_CS(enc->enc_pic.eo.num_ref_idx_l0_active_minus1); + RVCE_CS(enc->enc_pic.eo.num_ref_idx_l1_active_minus1); + + i = enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0; + if (i > 1 && enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { + enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000001; + enc->enc_pic.eo.enc_ref_list_modification_num = i - 1; + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); + } else { + enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000000; + enc->enc_pic.eo.enc_ref_list_modification_num = 0x00000000; + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); + } + + for (i = 0; i < 3; ++i) { + enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000000; + enc->enc_pic.eo.enc_ref_list_modification_num = 0x00000000; + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); + } + for (i = 0; i < 4; ++i) { + RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_op); + RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_num); + RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_idx); + RVCE_CS(enc->enc_pic.eo.enc_decoded_ref_base_picture_marking_op); + RVCE_CS(enc->enc_pic.eo.enc_decoded_ref_base_picture_marking_num); + } + + // encReferencePictureL0[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l0 = si_l0_slot(enc); + si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); + RVCE_CS(l0->picture_type); + RVCE_CS(l0->frame_num); + RVCE_CS(l0->pic_order_cnt); + RVCE_CS(luma_offset); + RVCE_CS(chroma_offset); + } else { + enc->enc_pic.eo.l0_enc_pic_type = 0x00000000; + enc->enc_pic.eo.l0_frame_number = 0x00000000; + enc->enc_pic.eo.l0_picture_order_count = 0x00000000; + enc->enc_pic.eo.l0_luma_offset = 0xffffffff; + enc->enc_pic.eo.l0_chroma_offset = 0xffffffff; + RVCE_CS(enc->enc_pic.eo.l0_enc_pic_type); + RVCE_CS(enc->enc_pic.eo.l0_frame_number); + RVCE_CS(enc->enc_pic.eo.l0_picture_order_count); + RVCE_CS(enc->enc_pic.eo.l0_luma_offset); + RVCE_CS(enc->enc_pic.eo.l0_chroma_offset); + } + + // encReferencePictureL0[1] + enc->enc_pic.eo.l0_picture_structure = 0x00000000; + enc->enc_pic.eo.l0_enc_pic_type = 0x00000000; + enc->enc_pic.eo.l0_frame_number = 0x00000000; + enc->enc_pic.eo.l0_picture_order_count = 0x00000000; + enc->enc_pic.eo.l0_luma_offset = 0xffffffff; + enc->enc_pic.eo.l0_chroma_offset = 0xffffffff; + RVCE_CS(enc->enc_pic.eo.l0_picture_structure); + RVCE_CS(enc->enc_pic.eo.l0_enc_pic_type); + RVCE_CS(enc->enc_pic.eo.l0_frame_number); + RVCE_CS(enc->enc_pic.eo.l0_picture_order_count); + RVCE_CS(enc->enc_pic.eo.l0_luma_offset); + RVCE_CS(enc->enc_pic.eo.l0_chroma_offset); + + // encReferencePictureL1[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l1 = si_l1_slot(enc); + si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); + RVCE_CS(l1->picture_type); + RVCE_CS(l1->frame_num); + RVCE_CS(l1->pic_order_cnt); + RVCE_CS(luma_offset); + RVCE_CS(chroma_offset); + } else { + enc->enc_pic.eo.l1_enc_pic_type = 0x00000000; + enc->enc_pic.eo.l1_frame_number = 0x00000000; + enc->enc_pic.eo.l1_picture_order_count = 0x00000000; + enc->enc_pic.eo.l1_luma_offset = 0xffffffff; + enc->enc_pic.eo.l1_chroma_offset = 0xffffffff; + RVCE_CS(enc->enc_pic.eo.l1_enc_pic_type); + RVCE_CS(enc->enc_pic.eo.l1_frame_number); + RVCE_CS(enc->enc_pic.eo.l1_picture_order_count); + RVCE_CS(enc->enc_pic.eo.l1_luma_offset); + RVCE_CS(enc->enc_pic.eo.l1_chroma_offset); + } + + si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); + RVCE_CS(luma_offset); + RVCE_CS(chroma_offset); + RVCE_CS(enc->enc_pic.eo.enc_coloc_buffer_offset); + RVCE_CS(enc->enc_pic.eo.enc_reconstructed_ref_base_picture_luma_offset); + RVCE_CS(enc->enc_pic.eo.enc_reconstructed_ref_base_picture_chroma_offset); + RVCE_CS(enc->enc_pic.eo.enc_reference_ref_base_picture_luma_offset); + RVCE_CS(enc->enc_pic.eo.enc_reference_ref_base_picture_chroma_offset); + RVCE_CS(enc->enc_pic.frame_num_cnt - 1); + RVCE_CS(enc->enc_pic.frame_num); + RVCE_CS(enc->enc_pic.pic_order_cnt); + RVCE_CS(enc->enc_pic.i_remain); + RVCE_CS(enc->enc_pic.p_remain); + RVCE_CS(enc->enc_pic.eo.num_b_pic_remain_in_rcgop); + RVCE_CS(enc->enc_pic.eo.num_ir_pic_remain_in_rcgop); + RVCE_CS(enc->enc_pic.eo.enable_intra_refresh); + + RVCE_CS(enc->enc_pic.eo.aq_variance_en); + RVCE_CS(enc->enc_pic.eo.aq_block_size); + RVCE_CS(enc->enc_pic.eo.aq_mb_variance_sel); + RVCE_CS(enc->enc_pic.eo.aq_frame_variance_sel); + RVCE_CS(enc->enc_pic.eo.aq_param_a); + RVCE_CS(enc->enc_pic.eo.aq_param_b); + RVCE_CS(enc->enc_pic.eo.aq_param_c); + RVCE_CS(enc->enc_pic.eo.aq_param_d); + RVCE_CS(enc->enc_pic.eo.aq_param_e); + + RVCE_CS(enc->enc_pic.eo.context_in_sfb); + RVCE_END(); } static void rate_control(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000005); // rate control - RVCE_CS(enc->enc_pic.rc.rc_method); - RVCE_CS(enc->enc_pic.rc.target_bitrate); - RVCE_CS(enc->enc_pic.rc.peak_bitrate); - RVCE_CS(enc->enc_pic.rc.frame_rate_num); - RVCE_CS(enc->enc_pic.rc.gop_size); - RVCE_CS(enc->enc_pic.rc.quant_i_frames); - RVCE_CS(enc->enc_pic.rc.quant_p_frames); - RVCE_CS(enc->enc_pic.rc.quant_b_frames); - RVCE_CS(enc->enc_pic.rc.vbv_buffer_size); - RVCE_CS(enc->enc_pic.rc.frame_rate_den); - RVCE_CS(enc->enc_pic.rc.vbv_buf_lv); - RVCE_CS(enc->enc_pic.rc.max_au_size); - RVCE_CS(enc->enc_pic.rc.qp_initial_mode); - RVCE_CS(enc->enc_pic.rc.target_bits_picture); - RVCE_CS(enc->enc_pic.rc.peak_bits_picture_integer); - RVCE_CS(enc->enc_pic.rc.peak_bits_picture_fraction); - RVCE_CS(enc->enc_pic.rc.min_qp); - RVCE_CS(enc->enc_pic.rc.max_qp); - RVCE_CS(enc->enc_pic.rc.skip_frame_enable); - RVCE_CS(enc->enc_pic.rc.fill_data_enable); - RVCE_CS(enc->enc_pic.rc.enforce_hrd); - RVCE_CS(enc->enc_pic.rc.b_pics_delta_qp); - RVCE_CS(enc->enc_pic.rc.ref_b_pics_delta_qp); - RVCE_CS(enc->enc_pic.rc.rc_reinit_disable); - RVCE_CS(enc->enc_pic.rc.enc_lcvbr_init_qp_flag); - RVCE_CS(enc->enc_pic.rc.lcvbrsatd_based_nonlinear_bit_budget_flag); - RVCE_END(); + RVCE_BEGIN(0x04000005); // rate control + RVCE_CS(enc->enc_pic.rc.rc_method); + RVCE_CS(enc->enc_pic.rc.target_bitrate); + RVCE_CS(enc->enc_pic.rc.peak_bitrate); + RVCE_CS(enc->enc_pic.rc.frame_rate_num); + RVCE_CS(enc->enc_pic.rc.gop_size); + RVCE_CS(enc->enc_pic.rc.quant_i_frames); + RVCE_CS(enc->enc_pic.rc.quant_p_frames); + RVCE_CS(enc->enc_pic.rc.quant_b_frames); + RVCE_CS(enc->enc_pic.rc.vbv_buffer_size); + RVCE_CS(enc->enc_pic.rc.frame_rate_den); + RVCE_CS(enc->enc_pic.rc.vbv_buf_lv); + RVCE_CS(enc->enc_pic.rc.max_au_size); + RVCE_CS(enc->enc_pic.rc.qp_initial_mode); + RVCE_CS(enc->enc_pic.rc.target_bits_picture); + RVCE_CS(enc->enc_pic.rc.peak_bits_picture_integer); + RVCE_CS(enc->enc_pic.rc.peak_bits_picture_fraction); + RVCE_CS(enc->enc_pic.rc.min_qp); + RVCE_CS(enc->enc_pic.rc.max_qp); + RVCE_CS(enc->enc_pic.rc.skip_frame_enable); + RVCE_CS(enc->enc_pic.rc.fill_data_enable); + RVCE_CS(enc->enc_pic.rc.enforce_hrd); + RVCE_CS(enc->enc_pic.rc.b_pics_delta_qp); + RVCE_CS(enc->enc_pic.rc.ref_b_pics_delta_qp); + RVCE_CS(enc->enc_pic.rc.rc_reinit_disable); + RVCE_CS(enc->enc_pic.rc.enc_lcvbr_init_qp_flag); + RVCE_CS(enc->enc_pic.rc.lcvbrsatd_based_nonlinear_bit_budget_flag); + RVCE_END(); } static void config(struct rvce_encoder *enc) { - enc->task_info(enc, 0x00000002, 0, 0xffffffff, 0); - enc->rate_control(enc); - enc->config_extension(enc); - enc->motion_estimation(enc); - enc->rdo(enc); - if (enc->use_vui) - enc->vui(enc); - enc->pic_control(enc); + enc->task_info(enc, 0x00000002, 0, 0xffffffff, 0); + enc->rate_control(enc); + enc->config_extension(enc); + enc->motion_estimation(enc); + enc->rdo(enc); + if (enc->use_vui) + enc->vui(enc); + enc->pic_control(enc); } static void config_extension(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000001); // config extension - RVCE_CS(enc->enc_pic.ce.enc_enable_perf_logging); - RVCE_END(); + RVCE_BEGIN(0x04000001); // config extension + RVCE_CS(enc->enc_pic.ce.enc_enable_perf_logging); + RVCE_END(); } static void feedback(struct rvce_encoder *enc) { - RVCE_BEGIN(0x05000005); // feedback buffer - RVCE_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo - RVCE_CS(enc->enc_pic.fb.feedback_ring_size); - RVCE_END(); + RVCE_BEGIN(0x05000005); // feedback buffer + RVCE_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo + RVCE_CS(enc->enc_pic.fb.feedback_ring_size); + RVCE_END(); } static void destroy(struct rvce_encoder *enc) { - enc->task_info(enc, 0x00000001, 0, 0, 0); + enc->task_info(enc, 0x00000001, 0, 0, 0); - feedback(enc); + feedback(enc); - RVCE_BEGIN(0x02000001); // destroy - RVCE_END(); + RVCE_BEGIN(0x02000001); // destroy + RVCE_END(); } static void motion_estimation(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000007); // motion estimation - RVCE_CS(enc->enc_pic.me.enc_ime_decimation_search); - RVCE_CS(enc->enc_pic.me.motion_est_half_pixel); - RVCE_CS(enc->enc_pic.me.motion_est_quarter_pixel); - RVCE_CS(enc->enc_pic.me.disable_favor_pmv_point); - RVCE_CS(enc->enc_pic.me.force_zero_point_center); - RVCE_CS(enc->enc_pic.me.lsmvert); - RVCE_CS(enc->enc_pic.me.enc_search_range_x); - RVCE_CS(enc->enc_pic.me.enc_search_range_y); - RVCE_CS(enc->enc_pic.me.enc_search1_range_x); - RVCE_CS(enc->enc_pic.me.enc_search1_range_y); - RVCE_CS(enc->enc_pic.me.disable_16x16_frame1); - RVCE_CS(enc->enc_pic.me.disable_satd); - RVCE_CS(enc->enc_pic.me.enable_amd); - RVCE_CS(enc->enc_pic.me.enc_disable_sub_mode); - RVCE_CS(enc->enc_pic.me.enc_ime_skip_x); - RVCE_CS(enc->enc_pic.me.enc_ime_skip_y); - RVCE_CS(enc->enc_pic.me.enc_en_ime_overw_dis_subm); - RVCE_CS(enc->enc_pic.me.enc_ime_overw_dis_subm_no); - RVCE_CS(enc->enc_pic.me.enc_ime2_search_range_x); - RVCE_CS(enc->enc_pic.me.enc_ime2_search_range_y); - RVCE_CS(enc->enc_pic.me.parallel_mode_speedup_enable); - RVCE_CS(enc->enc_pic.me.fme0_enc_disable_sub_mode); - RVCE_CS(enc->enc_pic.me.fme1_enc_disable_sub_mode); - RVCE_CS(enc->enc_pic.me.ime_sw_speedup_enable); - RVCE_END(); + RVCE_BEGIN(0x04000007); // motion estimation + RVCE_CS(enc->enc_pic.me.enc_ime_decimation_search); + RVCE_CS(enc->enc_pic.me.motion_est_half_pixel); + RVCE_CS(enc->enc_pic.me.motion_est_quarter_pixel); + RVCE_CS(enc->enc_pic.me.disable_favor_pmv_point); + RVCE_CS(enc->enc_pic.me.force_zero_point_center); + RVCE_CS(enc->enc_pic.me.lsmvert); + RVCE_CS(enc->enc_pic.me.enc_search_range_x); + RVCE_CS(enc->enc_pic.me.enc_search_range_y); + RVCE_CS(enc->enc_pic.me.enc_search1_range_x); + RVCE_CS(enc->enc_pic.me.enc_search1_range_y); + RVCE_CS(enc->enc_pic.me.disable_16x16_frame1); + RVCE_CS(enc->enc_pic.me.disable_satd); + RVCE_CS(enc->enc_pic.me.enable_amd); + RVCE_CS(enc->enc_pic.me.enc_disable_sub_mode); + RVCE_CS(enc->enc_pic.me.enc_ime_skip_x); + RVCE_CS(enc->enc_pic.me.enc_ime_skip_y); + RVCE_CS(enc->enc_pic.me.enc_en_ime_overw_dis_subm); + RVCE_CS(enc->enc_pic.me.enc_ime_overw_dis_subm_no); + RVCE_CS(enc->enc_pic.me.enc_ime2_search_range_x); + RVCE_CS(enc->enc_pic.me.enc_ime2_search_range_y); + RVCE_CS(enc->enc_pic.me.parallel_mode_speedup_enable); + RVCE_CS(enc->enc_pic.me.fme0_enc_disable_sub_mode); + RVCE_CS(enc->enc_pic.me.fme1_enc_disable_sub_mode); + RVCE_CS(enc->enc_pic.me.ime_sw_speedup_enable); + RVCE_END(); } static void pic_control(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000002); // pic control - RVCE_CS(enc->enc_pic.pc.enc_use_constrained_intra_pred); - RVCE_CS(enc->enc_pic.pc.enc_cabac_enable); - RVCE_CS(enc->enc_pic.pc.enc_cabac_idc); - RVCE_CS(enc->enc_pic.pc.enc_loop_filter_disable); - RVCE_CS(enc->enc_pic.pc.enc_lf_beta_offset); - RVCE_CS(enc->enc_pic.pc.enc_lf_alpha_c0_offset); - RVCE_CS(enc->enc_pic.pc.enc_crop_left_offset); - RVCE_CS(enc->enc_pic.pc.enc_crop_right_offset); - RVCE_CS(enc->enc_pic.pc.enc_crop_top_offset); - RVCE_CS(enc->enc_pic.pc.enc_crop_bottom_offset); - RVCE_CS(enc->enc_pic.pc.enc_num_mbs_per_slice); - RVCE_CS(enc->enc_pic.pc.enc_intra_refresh_num_mbs_per_slot); - RVCE_CS(enc->enc_pic.pc.enc_force_intra_refresh); - RVCE_CS(enc->enc_pic.pc.enc_force_imb_period); - RVCE_CS(enc->enc_pic.pc.enc_pic_order_cnt_type); - RVCE_CS(enc->enc_pic.pc.log2_max_pic_order_cnt_lsb_minus4); - RVCE_CS(enc->enc_pic.pc.enc_sps_id); - RVCE_CS(enc->enc_pic.pc.enc_pps_id); - RVCE_CS(enc->enc_pic.pc.enc_constraint_set_flags); - RVCE_CS(enc->enc_pic.pc.enc_b_pic_pattern); - RVCE_CS(enc->enc_pic.pc.weight_pred_mode_b_picture); - RVCE_CS(enc->enc_pic.pc.enc_number_of_reference_frames); - RVCE_CS(enc->enc_pic.pc.enc_max_num_ref_frames); - RVCE_CS(enc->enc_pic.pc.enc_num_default_active_ref_l0); - RVCE_CS(enc->enc_pic.pc.enc_num_default_active_ref_l1); - RVCE_CS(enc->enc_pic.pc.enc_slice_mode); - RVCE_CS(enc->enc_pic.pc.enc_max_slice_size); - RVCE_END(); + RVCE_BEGIN(0x04000002); // pic control + RVCE_CS(enc->enc_pic.pc.enc_use_constrained_intra_pred); + RVCE_CS(enc->enc_pic.pc.enc_cabac_enable); + RVCE_CS(enc->enc_pic.pc.enc_cabac_idc); + RVCE_CS(enc->enc_pic.pc.enc_loop_filter_disable); + RVCE_CS(enc->enc_pic.pc.enc_lf_beta_offset); + RVCE_CS(enc->enc_pic.pc.enc_lf_alpha_c0_offset); + RVCE_CS(enc->enc_pic.pc.enc_crop_left_offset); + RVCE_CS(enc->enc_pic.pc.enc_crop_right_offset); + RVCE_CS(enc->enc_pic.pc.enc_crop_top_offset); + RVCE_CS(enc->enc_pic.pc.enc_crop_bottom_offset); + RVCE_CS(enc->enc_pic.pc.enc_num_mbs_per_slice); + RVCE_CS(enc->enc_pic.pc.enc_intra_refresh_num_mbs_per_slot); + RVCE_CS(enc->enc_pic.pc.enc_force_intra_refresh); + RVCE_CS(enc->enc_pic.pc.enc_force_imb_period); + RVCE_CS(enc->enc_pic.pc.enc_pic_order_cnt_type); + RVCE_CS(enc->enc_pic.pc.log2_max_pic_order_cnt_lsb_minus4); + RVCE_CS(enc->enc_pic.pc.enc_sps_id); + RVCE_CS(enc->enc_pic.pc.enc_pps_id); + RVCE_CS(enc->enc_pic.pc.enc_constraint_set_flags); + RVCE_CS(enc->enc_pic.pc.enc_b_pic_pattern); + RVCE_CS(enc->enc_pic.pc.weight_pred_mode_b_picture); + RVCE_CS(enc->enc_pic.pc.enc_number_of_reference_frames); + RVCE_CS(enc->enc_pic.pc.enc_max_num_ref_frames); + RVCE_CS(enc->enc_pic.pc.enc_num_default_active_ref_l0); + RVCE_CS(enc->enc_pic.pc.enc_num_default_active_ref_l1); + RVCE_CS(enc->enc_pic.pc.enc_slice_mode); + RVCE_CS(enc->enc_pic.pc.enc_max_slice_size); + RVCE_END(); } static void rdo(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000008); // rdo - RVCE_CS(enc->enc_pic.rdo.enc_disable_tbe_pred_i_frame); - RVCE_CS(enc->enc_pic.rdo.enc_disable_tbe_pred_p_frame); - RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_y); - RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_uv); - RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_y); - RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_uv); - RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_y_1); - RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_uv_1); - RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_y_1); - RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_uv_1); - RVCE_CS(enc->enc_pic.rdo.enc_16x16_cost_adj); - RVCE_CS(enc->enc_pic.rdo.enc_skip_cost_adj); - RVCE_CS(enc->enc_pic.rdo.enc_force_16x16_skip); - RVCE_CS(enc->enc_pic.rdo.enc_disable_threshold_calc_a); - RVCE_CS(enc->enc_pic.rdo.enc_luma_coeff_cost); - RVCE_CS(enc->enc_pic.rdo.enc_luma_mb_coeff_cost); - RVCE_CS(enc->enc_pic.rdo.enc_chroma_coeff_cost); - RVCE_END(); + RVCE_BEGIN(0x04000008); // rdo + RVCE_CS(enc->enc_pic.rdo.enc_disable_tbe_pred_i_frame); + RVCE_CS(enc->enc_pic.rdo.enc_disable_tbe_pred_p_frame); + RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_y); + RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_uv); + RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_y); + RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_uv); + RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_y_1); + RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_uv_1); + RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_y_1); + RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_uv_1); + RVCE_CS(enc->enc_pic.rdo.enc_16x16_cost_adj); + RVCE_CS(enc->enc_pic.rdo.enc_skip_cost_adj); + RVCE_CS(enc->enc_pic.rdo.enc_force_16x16_skip); + RVCE_CS(enc->enc_pic.rdo.enc_disable_threshold_calc_a); + RVCE_CS(enc->enc_pic.rdo.enc_luma_coeff_cost); + RVCE_CS(enc->enc_pic.rdo.enc_luma_mb_coeff_cost); + RVCE_CS(enc->enc_pic.rdo.enc_chroma_coeff_cost); + RVCE_END(); } static void session(struct rvce_encoder *enc) { - RVCE_BEGIN(0x00000001); // session cmd - RVCE_CS(enc->stream_handle); - RVCE_END(); + RVCE_BEGIN(0x00000001); // session cmd + RVCE_CS(enc->stream_handle); + RVCE_END(); } -static void task_info(struct rvce_encoder *enc, uint32_t op, - uint32_t dep, uint32_t fb_idx, uint32_t ring_idx) +static void task_info(struct rvce_encoder *enc, uint32_t op, uint32_t dep, uint32_t fb_idx, + uint32_t ring_idx) { - RVCE_BEGIN(0x00000002); // task info - if (op == 0x3) { - if (enc->task_info_idx) { - uint32_t offs = enc->cs->current.cdw - enc->task_info_idx + 3; - // Update offsetOfNextTaskInfo - enc->cs->current.buf[enc->task_info_idx] = offs; - } - enc->task_info_idx = enc->cs->current.cdw; - } - enc->enc_pic.ti.task_operation = op; - enc->enc_pic.ti.reference_picture_dependency = dep; - enc->enc_pic.ti.feedback_index = fb_idx; - enc->enc_pic.ti.video_bitstream_ring_index = ring_idx; - RVCE_CS(enc->enc_pic.ti.offset_of_next_task_info); - RVCE_CS(enc->enc_pic.ti.task_operation); - RVCE_CS(enc->enc_pic.ti.reference_picture_dependency); - RVCE_CS(enc->enc_pic.ti.collocate_flag_dependency); - RVCE_CS(enc->enc_pic.ti.feedback_index); - RVCE_CS(enc->enc_pic.ti.video_bitstream_ring_index); - RVCE_END(); + RVCE_BEGIN(0x00000002); // task info + if (op == 0x3) { + if (enc->task_info_idx) { + uint32_t offs = enc->cs->current.cdw - enc->task_info_idx + 3; + // Update offsetOfNextTaskInfo + enc->cs->current.buf[enc->task_info_idx] = offs; + } + enc->task_info_idx = enc->cs->current.cdw; + } + enc->enc_pic.ti.task_operation = op; + enc->enc_pic.ti.reference_picture_dependency = dep; + enc->enc_pic.ti.feedback_index = fb_idx; + enc->enc_pic.ti.video_bitstream_ring_index = ring_idx; + RVCE_CS(enc->enc_pic.ti.offset_of_next_task_info); + RVCE_CS(enc->enc_pic.ti.task_operation); + RVCE_CS(enc->enc_pic.ti.reference_picture_dependency); + RVCE_CS(enc->enc_pic.ti.collocate_flag_dependency); + RVCE_CS(enc->enc_pic.ti.feedback_index); + RVCE_CS(enc->enc_pic.ti.video_bitstream_ring_index); + RVCE_END(); } static void vui(struct rvce_encoder *enc) { - int i; - - if (!enc->enc_pic.enable_vui) - return; - - RVCE_BEGIN(0x04000009); // vui - RVCE_CS(enc->enc_pic.vui.aspect_ratio_info_present_flag); - RVCE_CS(enc->enc_pic.vui.aspect_ratio_idc); - RVCE_CS(enc->enc_pic.vui.sar_width); - RVCE_CS(enc->enc_pic.vui.sar_height); - RVCE_CS(enc->enc_pic.vui.overscan_info_present_flag); - RVCE_CS(enc->enc_pic.vui.overscan_Approp_flag); - RVCE_CS(enc->enc_pic.vui.video_signal_type_present_flag); - RVCE_CS(enc->enc_pic.vui.video_format); - RVCE_CS(enc->enc_pic.vui.video_full_range_flag); - RVCE_CS(enc->enc_pic.vui.color_description_present_flag); - RVCE_CS(enc->enc_pic.vui.color_prim); - RVCE_CS(enc->enc_pic.vui.transfer_char); - RVCE_CS(enc->enc_pic.vui.matrix_coef); - RVCE_CS(enc->enc_pic.vui.chroma_loc_info_present_flag); - RVCE_CS(enc->enc_pic.vui.chroma_loc_top); - RVCE_CS(enc->enc_pic.vui.chroma_loc_bottom); - RVCE_CS(enc->enc_pic.vui.timing_info_present_flag); - RVCE_CS(enc->enc_pic.vui.num_units_in_tick); - RVCE_CS(enc->enc_pic.vui.time_scale); - RVCE_CS(enc->enc_pic.vui.fixed_frame_rate_flag); - RVCE_CS(enc->enc_pic.vui.nal_hrd_parameters_present_flag); - RVCE_CS(enc->enc_pic.vui.cpb_cnt_minus1); - RVCE_CS(enc->enc_pic.vui.bit_rate_scale); - RVCE_CS(enc->enc_pic.vui.cpb_size_scale); - for (i = 0; i < 32; i++) { - RVCE_CS(enc->enc_pic.vui.bit_rate_value_minus); - RVCE_CS(enc->enc_pic.vui.cpb_size_value_minus); - RVCE_CS(enc->enc_pic.vui.cbr_flag); - } - RVCE_CS(enc->enc_pic.vui.initial_cpb_removal_delay_length_minus1); - RVCE_CS(enc->enc_pic.vui.cpb_removal_delay_length_minus1); - RVCE_CS(enc->enc_pic.vui.dpb_output_delay_length_minus1); - RVCE_CS(enc->enc_pic.vui.time_offset_length); - RVCE_CS(enc->enc_pic.vui.low_delay_hrd_flag); - RVCE_CS(enc->enc_pic.vui.pic_struct_present_flag); - RVCE_CS(enc->enc_pic.vui.bitstream_restriction_present_flag); - RVCE_CS(enc->enc_pic.vui.motion_vectors_over_pic_boundaries_flag); - RVCE_CS(enc->enc_pic.vui.max_bytes_per_pic_denom); - RVCE_CS(enc->enc_pic.vui.max_bits_per_mb_denom); - RVCE_CS(enc->enc_pic.vui.log2_max_mv_length_hori); - RVCE_CS(enc->enc_pic.vui.log2_max_mv_length_vert); - RVCE_CS(enc->enc_pic.vui.num_reorder_frames); - RVCE_CS(enc->enc_pic.vui.max_dec_frame_buffering); - RVCE_END(); + int i; + + if (!enc->enc_pic.enable_vui) + return; + + RVCE_BEGIN(0x04000009); // vui + RVCE_CS(enc->enc_pic.vui.aspect_ratio_info_present_flag); + RVCE_CS(enc->enc_pic.vui.aspect_ratio_idc); + RVCE_CS(enc->enc_pic.vui.sar_width); + RVCE_CS(enc->enc_pic.vui.sar_height); + RVCE_CS(enc->enc_pic.vui.overscan_info_present_flag); + RVCE_CS(enc->enc_pic.vui.overscan_Approp_flag); + RVCE_CS(enc->enc_pic.vui.video_signal_type_present_flag); + RVCE_CS(enc->enc_pic.vui.video_format); + RVCE_CS(enc->enc_pic.vui.video_full_range_flag); + RVCE_CS(enc->enc_pic.vui.color_description_present_flag); + RVCE_CS(enc->enc_pic.vui.color_prim); + RVCE_CS(enc->enc_pic.vui.transfer_char); + RVCE_CS(enc->enc_pic.vui.matrix_coef); + RVCE_CS(enc->enc_pic.vui.chroma_loc_info_present_flag); + RVCE_CS(enc->enc_pic.vui.chroma_loc_top); + RVCE_CS(enc->enc_pic.vui.chroma_loc_bottom); + RVCE_CS(enc->enc_pic.vui.timing_info_present_flag); + RVCE_CS(enc->enc_pic.vui.num_units_in_tick); + RVCE_CS(enc->enc_pic.vui.time_scale); + RVCE_CS(enc->enc_pic.vui.fixed_frame_rate_flag); + RVCE_CS(enc->enc_pic.vui.nal_hrd_parameters_present_flag); + RVCE_CS(enc->enc_pic.vui.cpb_cnt_minus1); + RVCE_CS(enc->enc_pic.vui.bit_rate_scale); + RVCE_CS(enc->enc_pic.vui.cpb_size_scale); + for (i = 0; i < 32; i++) { + RVCE_CS(enc->enc_pic.vui.bit_rate_value_minus); + RVCE_CS(enc->enc_pic.vui.cpb_size_value_minus); + RVCE_CS(enc->enc_pic.vui.cbr_flag); + } + RVCE_CS(enc->enc_pic.vui.initial_cpb_removal_delay_length_minus1); + RVCE_CS(enc->enc_pic.vui.cpb_removal_delay_length_minus1); + RVCE_CS(enc->enc_pic.vui.dpb_output_delay_length_minus1); + RVCE_CS(enc->enc_pic.vui.time_offset_length); + RVCE_CS(enc->enc_pic.vui.low_delay_hrd_flag); + RVCE_CS(enc->enc_pic.vui.pic_struct_present_flag); + RVCE_CS(enc->enc_pic.vui.bitstream_restriction_present_flag); + RVCE_CS(enc->enc_pic.vui.motion_vectors_over_pic_boundaries_flag); + RVCE_CS(enc->enc_pic.vui.max_bytes_per_pic_denom); + RVCE_CS(enc->enc_pic.vui.max_bits_per_mb_denom); + RVCE_CS(enc->enc_pic.vui.log2_max_mv_length_hori); + RVCE_CS(enc->enc_pic.vui.log2_max_mv_length_vert); + RVCE_CS(enc->enc_pic.vui.num_reorder_frames); + RVCE_CS(enc->enc_pic.vui.max_dec_frame_buffering); + RVCE_END(); } void si_vce_52_init(struct rvce_encoder *enc) { - enc->session = session; - enc->task_info = task_info; - enc->create = create; - enc->feedback = feedback; - enc->rate_control = rate_control; - enc->config_extension = config_extension; - enc->pic_control = pic_control; - enc->motion_estimation = motion_estimation; - enc->rdo = rdo; - enc->vui = vui; - enc->config = config; - enc->encode = encode; - enc->destroy = destroy; - enc->si_get_pic_param = si_vce_52_get_param; + enc->session = session; + enc->task_info = task_info; + enc->create = create; + enc->feedback = feedback; + enc->rate_control = rate_control; + enc->config_extension = config_extension; + enc->pic_control = pic_control; + enc->motion_estimation = motion_estimation; + enc->rdo = rdo; + enc->vui = vui; + enc->config = config; + enc->encode = encode; + enc->destroy = destroy; + enc->si_get_pic_param = si_vce_52_get_param; } diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c b/src/gallium/drivers/radeon/radeon_vcn_dec.c index 0f903b1629f..025307121fe 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_dec.c +++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c @@ -25,1267 +25,1235 @@ * **************************************************************************/ -#include <assert.h> -#include <stdio.h> +#include "radeon_vcn_dec.h" #include "pipe/p_video_codec.h" - +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" #include "util/u_video.h" - #include "vl/vl_mpeg12_decoder.h" - -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_vcn_dec.h" #include "vl/vl_probs_table.h" -#define FB_BUFFER_OFFSET 0x1000 -#define FB_BUFFER_SIZE 2048 -#define IT_SCALING_TABLE_SIZE 992 -#define VP9_PROBS_TABLE_SIZE (RDECODE_VP9_PROBS_DATA_SIZE + 256) -#define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024) +#include <assert.h> +#include <stdio.h> + +#define FB_BUFFER_OFFSET 0x1000 +#define FB_BUFFER_SIZE 2048 +#define IT_SCALING_TABLE_SIZE 992 +#define VP9_PROBS_TABLE_SIZE (RDECODE_VP9_PROBS_DATA_SIZE + 256) +#define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024) -#define RDECODE_VCN1_GPCOM_VCPU_CMD 0x2070c -#define RDECODE_VCN1_GPCOM_VCPU_DATA0 0x20710 -#define RDECODE_VCN1_GPCOM_VCPU_DATA1 0x20714 -#define RDECODE_VCN1_ENGINE_CNTL 0x20718 +#define RDECODE_VCN1_GPCOM_VCPU_CMD 0x2070c +#define RDECODE_VCN1_GPCOM_VCPU_DATA0 0x20710 +#define RDECODE_VCN1_GPCOM_VCPU_DATA1 0x20714 +#define RDECODE_VCN1_ENGINE_CNTL 0x20718 -#define RDECODE_VCN2_GPCOM_VCPU_CMD (0x503 << 2) -#define RDECODE_VCN2_GPCOM_VCPU_DATA0 (0x504 << 2) -#define RDECODE_VCN2_GPCOM_VCPU_DATA1 (0x505 << 2) -#define RDECODE_VCN2_ENGINE_CNTL (0x506 << 2) +#define RDECODE_VCN2_GPCOM_VCPU_CMD (0x503 << 2) +#define RDECODE_VCN2_GPCOM_VCPU_DATA0 (0x504 << 2) +#define RDECODE_VCN2_GPCOM_VCPU_DATA1 (0x505 << 2) +#define RDECODE_VCN2_ENGINE_CNTL (0x506 << 2) -#define RDECODE_VCN2_5_GPCOM_VCPU_CMD 0x3c -#define RDECODE_VCN2_5_GPCOM_VCPU_DATA0 0x40 -#define RDECODE_VCN2_5_GPCOM_VCPU_DATA1 0x44 -#define RDECODE_VCN2_5_ENGINE_CNTL 0x9b4 +#define RDECODE_VCN2_5_GPCOM_VCPU_CMD 0x3c +#define RDECODE_VCN2_5_GPCOM_VCPU_DATA0 0x40 +#define RDECODE_VCN2_5_GPCOM_VCPU_DATA1 0x44 +#define RDECODE_VCN2_5_ENGINE_CNTL 0x9b4 -#define NUM_MPEG2_REFS 6 -#define NUM_H264_REFS 17 -#define NUM_VC1_REFS 5 -#define NUM_VP9_REFS 8 +#define NUM_MPEG2_REFS 6 +#define NUM_H264_REFS 17 +#define NUM_VC1_REFS 5 +#define NUM_VP9_REFS 8 static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec, - struct pipe_h264_picture_desc *pic) + struct pipe_h264_picture_desc *pic) { - rvcn_dec_message_avc_t result; - - memset(&result, 0, sizeof(result)); - switch (pic->base.profile) { - case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: - case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: - result.profile = RDECODE_H264_PROFILE_BASELINE; - break; - - case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: - result.profile = RDECODE_H264_PROFILE_MAIN; - break; - - case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: - result.profile = RDECODE_H264_PROFILE_HIGH; - break; - - default: - assert(0); - break; - } - - result.level = dec->base.level; - - result.sps_info_flags = 0; - result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; - result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; - result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; - result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; - result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT; - - result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; - result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; - result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; - result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; - result.log2_max_pic_order_cnt_lsb_minus4 = - pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; - - switch (dec->base.chroma_format) { - case PIPE_VIDEO_CHROMA_FORMAT_NONE: - break; - case PIPE_VIDEO_CHROMA_FORMAT_400: - result.chroma_format = 0; - break; - case PIPE_VIDEO_CHROMA_FORMAT_420: - result.chroma_format = 1; - break; - case PIPE_VIDEO_CHROMA_FORMAT_422: - result.chroma_format = 2; - break; - case PIPE_VIDEO_CHROMA_FORMAT_444: - result.chroma_format = 3; - break; - } - - result.pps_info_flags = 0; - result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; - result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; - result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; - result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; - result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; - result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; - result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; - result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; - - result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; - result.slice_group_map_type = pic->pps->slice_group_map_type; - result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; - result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; - result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; - result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; - - memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16); - memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64); - - memcpy(dec->it, result.scaling_list_4x4, 6*16); - memcpy((dec->it + 96), result.scaling_list_8x8, 2*64); - - result.num_ref_frames = pic->num_ref_frames; - - result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; - result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; - - result.frame_num = pic->frame_num; - memcpy(result.frame_num_list, pic->frame_num_list, 4*16); - result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; - result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; - memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2); - - result.decoded_pic_idx = pic->frame_num; - - return result; + rvcn_dec_message_avc_t result; + + memset(&result, 0, sizeof(result)); + switch (pic->base.profile) { + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: + result.profile = RDECODE_H264_PROFILE_BASELINE; + break; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + result.profile = RDECODE_H264_PROFILE_MAIN; + break; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + result.profile = RDECODE_H264_PROFILE_HIGH; + break; + + default: + assert(0); + break; + } + + result.level = dec->base.level; + + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; + result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; + result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; + result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; + result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT; + + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; + result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + + switch (dec->base.chroma_format) { + case PIPE_VIDEO_CHROMA_FORMAT_NONE: + break; + case PIPE_VIDEO_CHROMA_FORMAT_400: + result.chroma_format = 0; + break; + case PIPE_VIDEO_CHROMA_FORMAT_420: + result.chroma_format = 1; + break; + case PIPE_VIDEO_CHROMA_FORMAT_422: + result.chroma_format = 2; + break; + case PIPE_VIDEO_CHROMA_FORMAT_444: + result.chroma_format = 3; + break; + } + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; + result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; + result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; + result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; + result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; + result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; + + result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; + result.slice_group_map_type = pic->pps->slice_group_map_type; + result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; + result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; + result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; + result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; + + memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6 * 16); + memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2 * 64); + + memcpy(dec->it, result.scaling_list_4x4, 6 * 16); + memcpy((dec->it + 96), result.scaling_list_8x8, 2 * 64); + + result.num_ref_frames = pic->num_ref_frames; + + result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; + result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; + + result.frame_num = pic->frame_num; + memcpy(result.frame_num_list, pic->frame_num_list, 4 * 16); + result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; + result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; + memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4 * 16 * 2); + + result.decoded_pic_idx = pic->frame_num; + + return result; } static void radeon_dec_destroy_associated_data(void *data) { - /* NOOP, since we only use an intptr */ + /* NOOP, since we only use an intptr */ } static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_h265_picture_desc *pic) + struct pipe_video_buffer *target, + struct pipe_h265_picture_desc *pic) { - rvcn_dec_message_hevc_t result; - unsigned i, j; - - memset(&result, 0, sizeof(result)); - result.sps_info_flags = 0; - result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; - result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; - result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; - result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; - result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; - result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; - result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; - result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; - result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; - if (((struct si_screen*)dec->screen)->info.family == CHIP_CARRIZO) - result.sps_info_flags |= 1 << 9; - if (pic->UseRefPicList == true) - result.sps_info_flags |= 1 << 10; - - result.chroma_format = pic->pps->sps->chroma_format_idc; - result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; - result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; - result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; - result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; - result.log2_min_luma_coding_block_size_minus3 = - pic->pps->sps->log2_min_luma_coding_block_size_minus3; - result.log2_diff_max_min_luma_coding_block_size = - pic->pps->sps->log2_diff_max_min_luma_coding_block_size; - result.log2_min_transform_block_size_minus2 = - pic->pps->sps->log2_min_transform_block_size_minus2; - result.log2_diff_max_min_transform_block_size = - pic->pps->sps->log2_diff_max_min_transform_block_size; - result.max_transform_hierarchy_depth_inter = - pic->pps->sps->max_transform_hierarchy_depth_inter; - result.max_transform_hierarchy_depth_intra = - pic->pps->sps->max_transform_hierarchy_depth_intra; - result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; - result.pcm_sample_bit_depth_chroma_minus1 = - pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; - result.log2_min_pcm_luma_coding_block_size_minus3 = - pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; - result.log2_diff_max_min_pcm_luma_coding_block_size = - pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; - result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; - - result.pps_info_flags = 0; - result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; - result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; - result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; - result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; - result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; - result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; - result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; - result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; - result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; - result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; - result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; - result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; - result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; - result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; - result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; - result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; - result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; - result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; - result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; - result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; - - result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; - result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; - result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; - result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; - result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; - result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; - result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; - result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; - result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; - result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; - result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; - result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; - result.init_qp_minus26 = pic->pps->init_qp_minus26; - - for (i = 0; i < 19; ++i) - result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; - - for (i = 0; i < 21; ++i) - result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; - - result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; - result.curr_poc = pic->CurrPicOrderCntVal; - - for (i = 0 ; i < 16 ; i++) { - for (j = 0; (pic->ref[j] != NULL) && (j < 16) ; j++) { - if (dec->render_pic_list[i] == pic->ref[j]) - break; - if (j == 15) - dec->render_pic_list[i] = NULL; - else if (pic->ref[j+1] == NULL) - dec->render_pic_list[i] = NULL; - } - } - for (i = 0 ; i < 16 ; i++) { - if (dec->render_pic_list[i] == NULL) { - dec->render_pic_list[i] = target; - result.curr_idx = i; - break; - } - } - - vl_video_buffer_set_associated_data(target, &dec->base, - (void *)(uintptr_t)result.curr_idx, - &radeon_dec_destroy_associated_data); - - for (i = 0; i < 16; ++i) { - struct pipe_video_buffer *ref = pic->ref[i]; - uintptr_t ref_pic = 0; - - result.poc_list[i] = pic->PicOrderCntVal[i]; - - if (ref) - ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); - else - ref_pic = 0x7F; - result.ref_pic_list[i] = ref_pic; - } - - for (i = 0; i < 8; ++i) { - result.ref_pic_set_st_curr_before[i] = 0xFF; - result.ref_pic_set_st_curr_after[i] = 0xFF; - result.ref_pic_set_lt_curr[i] = 0xFF; - } - - for (i = 0; i < pic->NumPocStCurrBefore; ++i) - result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; - - for (i = 0; i < pic->NumPocStCurrAfter; ++i) - result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; - - for (i = 0; i < pic->NumPocLtCurr; ++i) - result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; - - for (i = 0; i < 6; ++i) - result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; - - for (i = 0; i < 2; ++i) - result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; - - memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); - memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); - memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); - memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); - - for (i = 0 ; i < 2 ; i++) { - for (j = 0 ; j < 15 ; j++) - result.direct_reflist[i][j] = pic->RefPicList[i][j]; - } - - if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { - if (target->buffer_format == PIPE_FORMAT_P010 || - target->buffer_format == PIPE_FORMAT_P016) { - result.p010_mode = 1; - result.msb_mode = 1; - } else { - result.p010_mode = 0; - result.luma_10to8 = 5; - result.chroma_10to8 = 5; - result.hevc_reserved[0] = 4; /* sclr_luma10to8 */ - result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */ - } - } - - return result; + rvcn_dec_message_hevc_t result; + unsigned i, j; + + memset(&result, 0, sizeof(result)); + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; + result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; + result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; + result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; + result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; + result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; + result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; + result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; + result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; + if (((struct si_screen *)dec->screen)->info.family == CHIP_CARRIZO) + result.sps_info_flags |= 1 << 9; + if (pic->UseRefPicList == true) + result.sps_info_flags |= 1 << 10; + + result.chroma_format = pic->pps->sps->chroma_format_idc; + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; + result.log2_min_luma_coding_block_size_minus3 = + pic->pps->sps->log2_min_luma_coding_block_size_minus3; + result.log2_diff_max_min_luma_coding_block_size = + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + result.log2_min_transform_block_size_minus2 = + pic->pps->sps->log2_min_transform_block_size_minus2; + result.log2_diff_max_min_transform_block_size = + pic->pps->sps->log2_diff_max_min_transform_block_size; + result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter; + result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra; + result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; + result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; + result.log2_min_pcm_luma_coding_block_size_minus3 = + pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; + result.log2_diff_max_min_pcm_luma_coding_block_size = + pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; + result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; + result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; + result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; + result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; + result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; + result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; + result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; + result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; + result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; + result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; + result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; + result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; + result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; + result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; + result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; + result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; + result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; + result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; + + result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; + result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; + result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; + result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; + result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; + result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; + result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; + result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; + result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; + result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; + result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; + result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; + result.init_qp_minus26 = pic->pps->init_qp_minus26; + + for (i = 0; i < 19; ++i) + result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; + + for (i = 0; i < 21; ++i) + result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; + + result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; + result.curr_poc = pic->CurrPicOrderCntVal; + + for (i = 0; i < 16; i++) { + for (j = 0; (pic->ref[j] != NULL) && (j < 16); j++) { + if (dec->render_pic_list[i] == pic->ref[j]) + break; + if (j == 15) + dec->render_pic_list[i] = NULL; + else if (pic->ref[j + 1] == NULL) + dec->render_pic_list[i] = NULL; + } + } + for (i = 0; i < 16; i++) { + if (dec->render_pic_list[i] == NULL) { + dec->render_pic_list[i] = target; + result.curr_idx = i; + break; + } + } + + vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)result.curr_idx, + &radeon_dec_destroy_associated_data); + + for (i = 0; i < 16; ++i) { + struct pipe_video_buffer *ref = pic->ref[i]; + uintptr_t ref_pic = 0; + + result.poc_list[i] = pic->PicOrderCntVal[i]; + + if (ref) + ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + else + ref_pic = 0x7F; + result.ref_pic_list[i] = ref_pic; + } + + for (i = 0; i < 8; ++i) { + result.ref_pic_set_st_curr_before[i] = 0xFF; + result.ref_pic_set_st_curr_after[i] = 0xFF; + result.ref_pic_set_lt_curr[i] = 0xFF; + } + + for (i = 0; i < pic->NumPocStCurrBefore; ++i) + result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; + + for (i = 0; i < pic->NumPocStCurrAfter; ++i) + result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; + + for (i = 0; i < pic->NumPocLtCurr; ++i) + result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; + + for (i = 0; i < 6; ++i) + result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; + + for (i = 0; i < 2; ++i) + result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; + + memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); + memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); + memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); + memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); + + for (i = 0; i < 2; i++) { + for (j = 0; j < 15; j++) + result.direct_reflist[i][j] = pic->RefPicList[i][j]; + } + + if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { + if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) { + result.p010_mode = 1; + result.msb_mode = 1; + } else { + result.p010_mode = 0; + result.luma_10to8 = 5; + result.chroma_10to8 = 5; + result.hevc_reserved[0] = 4; /* sclr_luma10to8 */ + result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */ + } + } + + return result; } static void fill_probs_table(void *ptr) { - rvcn_dec_vp9_probs_t *probs = (rvcn_dec_vp9_probs_t *)ptr; - - memcpy(&probs->coef_probs[0], default_coef_probs_4x4, sizeof(default_coef_probs_4x4)); - memcpy(&probs->coef_probs[1], default_coef_probs_8x8, sizeof(default_coef_probs_8x8)); - memcpy(&probs->coef_probs[2], default_coef_probs_16x16, sizeof(default_coef_probs_16x16)); - memcpy(&probs->coef_probs[3], default_coef_probs_32x32, sizeof(default_coef_probs_32x32)); - memcpy(probs->y_mode_prob, default_if_y_probs, sizeof(default_if_y_probs)); - memcpy(probs->uv_mode_prob, default_if_uv_probs, sizeof(default_if_uv_probs)); - memcpy(probs->single_ref_prob, default_single_ref_p, sizeof(default_single_ref_p)); - memcpy(probs->switchable_interp_prob, default_switchable_interp_prob, sizeof(default_switchable_interp_prob)); - memcpy(probs->partition_prob, default_partition_probs, sizeof(default_partition_probs)); - memcpy(probs->inter_mode_probs, default_inter_mode_probs, sizeof(default_inter_mode_probs)); - memcpy(probs->mbskip_probs, default_skip_probs, sizeof(default_skip_probs)); - memcpy(probs->intra_inter_prob, default_intra_inter_p, sizeof(default_intra_inter_p)); - memcpy(probs->comp_inter_prob, default_comp_inter_p, sizeof(default_comp_inter_p)); - memcpy(probs->comp_ref_prob, default_comp_ref_p, sizeof(default_comp_ref_p)); - memcpy(probs->tx_probs_32x32, default_tx_probs_32x32, sizeof(default_tx_probs_32x32)); - memcpy(probs->tx_probs_16x16, default_tx_probs_16x16, sizeof(default_tx_probs_16x16)); - memcpy(probs->tx_probs_8x8, default_tx_probs_8x8, sizeof(default_tx_probs_8x8)); - memcpy(probs->mv_joints, default_nmv_joints, sizeof(default_nmv_joints)); - memcpy(&probs->mv_comps[0], default_nmv_components, sizeof(default_nmv_components)); - memset(&probs->nmvc_mask, 0, sizeof(rvcn_dec_vp9_nmv_ctx_mask_t)); + rvcn_dec_vp9_probs_t *probs = (rvcn_dec_vp9_probs_t *)ptr; + + memcpy(&probs->coef_probs[0], default_coef_probs_4x4, sizeof(default_coef_probs_4x4)); + memcpy(&probs->coef_probs[1], default_coef_probs_8x8, sizeof(default_coef_probs_8x8)); + memcpy(&probs->coef_probs[2], default_coef_probs_16x16, sizeof(default_coef_probs_16x16)); + memcpy(&probs->coef_probs[3], default_coef_probs_32x32, sizeof(default_coef_probs_32x32)); + memcpy(probs->y_mode_prob, default_if_y_probs, sizeof(default_if_y_probs)); + memcpy(probs->uv_mode_prob, default_if_uv_probs, sizeof(default_if_uv_probs)); + memcpy(probs->single_ref_prob, default_single_ref_p, sizeof(default_single_ref_p)); + memcpy(probs->switchable_interp_prob, default_switchable_interp_prob, + sizeof(default_switchable_interp_prob)); + memcpy(probs->partition_prob, default_partition_probs, sizeof(default_partition_probs)); + memcpy(probs->inter_mode_probs, default_inter_mode_probs, sizeof(default_inter_mode_probs)); + memcpy(probs->mbskip_probs, default_skip_probs, sizeof(default_skip_probs)); + memcpy(probs->intra_inter_prob, default_intra_inter_p, sizeof(default_intra_inter_p)); + memcpy(probs->comp_inter_prob, default_comp_inter_p, sizeof(default_comp_inter_p)); + memcpy(probs->comp_ref_prob, default_comp_ref_p, sizeof(default_comp_ref_p)); + memcpy(probs->tx_probs_32x32, default_tx_probs_32x32, sizeof(default_tx_probs_32x32)); + memcpy(probs->tx_probs_16x16, default_tx_probs_16x16, sizeof(default_tx_probs_16x16)); + memcpy(probs->tx_probs_8x8, default_tx_probs_8x8, sizeof(default_tx_probs_8x8)); + memcpy(probs->mv_joints, default_nmv_joints, sizeof(default_nmv_joints)); + memcpy(&probs->mv_comps[0], default_nmv_components, sizeof(default_nmv_components)); + memset(&probs->nmvc_mask, 0, sizeof(rvcn_dec_vp9_nmv_ctx_mask_t)); } static rvcn_dec_message_vp9_t get_vp9_msg(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_vp9_picture_desc *pic) + struct pipe_video_buffer *target, + struct pipe_vp9_picture_desc *pic) { - rvcn_dec_message_vp9_t result; - unsigned i; - - memset(&result, 0, sizeof(result)); - - /* segment table */ - rvcn_dec_vp9_probs_segment_t *prbs = (rvcn_dec_vp9_probs_segment_t *)(dec->probs); - - if (pic->picture_parameter.pic_fields.segmentation_enabled) { - for (i = 0; i < 8; ++i) { - prbs->seg.feature_data[i] = - (pic->slice_parameter.seg_param[i].alt_quant & 0xffff) | - ((pic->slice_parameter.seg_param[i].alt_lf & 0xff) << 16) | - ((pic->slice_parameter.seg_param[i].segment_flags.segment_reference & 0xf) << 24); - prbs->seg.feature_mask[i] = - (pic->slice_parameter.seg_param[i].alt_quant_enabled << 0) | - (pic->slice_parameter.seg_param[i].alt_lf_enabled << 1) | - (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_enabled << 2) | - (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_skipped << 3); - } - - for (i = 0; i < 7; ++i) - prbs->seg.tree_probs[i] = pic->picture_parameter.mb_segment_tree_probs[i]; - - for (i = 0; i < 3; ++i) - prbs->seg.pred_probs[i] = pic->picture_parameter.segment_pred_probs[i]; - - prbs->seg.abs_delta = 0; - } else - memset(&prbs->seg, 0, 256); - - result.frame_header_flags = - (pic->picture_parameter.pic_fields.frame_type << - RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.error_resilient_mode << - RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.intra_only << - RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.allow_high_precision_mv << - RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.frame_parallel_decoding_mode << - RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.refresh_frame_context << - RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.segmentation_enabled << - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.segmentation_update_map << - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.segmentation_temporal_update << - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.mode_ref_delta_enabled << - RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.mode_ref_delta_update << - RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK; - - result.frame_header_flags |= ((dec->show_frame && - !pic->picture_parameter.pic_fields.error_resilient_mode) - << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK; - dec->show_frame = pic->picture_parameter.pic_fields.show_frame; - - result.interp_filter = pic->picture_parameter.pic_fields.mcomp_filter_type; - - result.frame_context_idx = pic->picture_parameter.pic_fields.frame_context_idx; - result.reset_frame_context = pic->picture_parameter.pic_fields.reset_frame_context; - - result.filter_level = pic->picture_parameter.filter_level; - result.sharpness_level = pic->picture_parameter.sharpness_level; - - for (i = 0; i < 8; ++i) - memcpy(result.lf_adj_level[i], pic->slice_parameter.seg_param[i].filter_level, 4 * 2); - - if (pic->picture_parameter.pic_fields.lossless_flag) { - result.base_qindex = 0; - result.y_dc_delta_q = 0; - result.uv_ac_delta_q = 0; - result.uv_dc_delta_q = 0; - } else { - result.base_qindex = pic->picture_parameter.base_qindex; - result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q; - result.uv_ac_delta_q = pic->picture_parameter.uv_ac_delta_q; - result.uv_dc_delta_q = pic->picture_parameter.uv_dc_delta_q; - } - - result.log2_tile_cols = pic->picture_parameter.log2_tile_columns; - result.log2_tile_rows = pic->picture_parameter.log2_tile_rows; - result.chroma_format = 1; - result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 - = (pic->picture_parameter.bit_depth - 8); - - result.vp9_frame_size = align(dec->bs_size, 128); - result.uncompressed_header_size = pic->picture_parameter.frame_header_length_in_bytes; - result.compressed_header_size = pic->picture_parameter.first_partition_size; - - assert(dec->base.max_references + 1 <= 16); - - for (i = 0 ; i < 16 ; ++i) { - if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) { - result.curr_pic_idx = - (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base); - break; - } else if (!dec->render_pic_list[i]) { - dec->render_pic_list[i] = target; - result.curr_pic_idx = dec->ref_idx; - vl_video_buffer_set_associated_data(target, &dec->base, - (void *)(uintptr_t)dec->ref_idx++, - &radeon_dec_destroy_associated_data); - break; - } - } - - for (i = 0 ; i < 8; i++) { - result.ref_frame_map[i] = (pic->ref[i]) ? - (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) : - 0x7f; - } - - result.frame_refs[0] = result.ref_frame_map[pic->picture_parameter.pic_fields.last_ref_frame]; - result.ref_frame_sign_bias[0] = pic->picture_parameter.pic_fields.last_ref_frame_sign_bias; - result.frame_refs[1] = result.ref_frame_map[pic->picture_parameter.pic_fields.golden_ref_frame]; - result.ref_frame_sign_bias[1] = pic->picture_parameter.pic_fields.golden_ref_frame_sign_bias; - result.frame_refs[2] = result.ref_frame_map[pic->picture_parameter.pic_fields.alt_ref_frame]; - result.ref_frame_sign_bias[2] = pic->picture_parameter.pic_fields.alt_ref_frame_sign_bias; - - if (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) { - if (target->buffer_format == PIPE_FORMAT_P010 || - target->buffer_format == PIPE_FORMAT_P016) { - result.p010_mode = 1; - result.msb_mode = 1; - } else { - result.p010_mode = 0; - result.luma_10to8 = 1; - result.chroma_10to8 = 1; - } - } - - return result; + rvcn_dec_message_vp9_t result; + unsigned i; + + memset(&result, 0, sizeof(result)); + + /* segment table */ + rvcn_dec_vp9_probs_segment_t *prbs = (rvcn_dec_vp9_probs_segment_t *)(dec->probs); + + if (pic->picture_parameter.pic_fields.segmentation_enabled) { + for (i = 0; i < 8; ++i) { + prbs->seg.feature_data[i] = + (pic->slice_parameter.seg_param[i].alt_quant & 0xffff) | + ((pic->slice_parameter.seg_param[i].alt_lf & 0xff) << 16) | + ((pic->slice_parameter.seg_param[i].segment_flags.segment_reference & 0xf) << 24); + prbs->seg.feature_mask[i] = + (pic->slice_parameter.seg_param[i].alt_quant_enabled << 0) | + (pic->slice_parameter.seg_param[i].alt_lf_enabled << 1) | + (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_enabled << 2) | + (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_skipped << 3); + } + + for (i = 0; i < 7; ++i) + prbs->seg.tree_probs[i] = pic->picture_parameter.mb_segment_tree_probs[i]; + + for (i = 0; i < 3; ++i) + prbs->seg.pred_probs[i] = pic->picture_parameter.segment_pred_probs[i]; + + prbs->seg.abs_delta = 0; + } else + memset(&prbs->seg, 0, 256); + + result.frame_header_flags = (pic->picture_parameter.pic_fields.frame_type + << RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.error_resilient_mode + << RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.intra_only + << RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.allow_high_precision_mv + << RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.frame_parallel_decoding_mode + << RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.refresh_frame_context + << RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_enabled + << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_update_map + << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_temporal_update + << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK; + + result.frame_header_flags |= (pic->picture_parameter.mode_ref_delta_enabled + << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK; + + result.frame_header_flags |= (pic->picture_parameter.mode_ref_delta_update + << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK; + + result.frame_header_flags |= + ((dec->show_frame && !pic->picture_parameter.pic_fields.error_resilient_mode) + << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK; + dec->show_frame = pic->picture_parameter.pic_fields.show_frame; + + result.interp_filter = pic->picture_parameter.pic_fields.mcomp_filter_type; + + result.frame_context_idx = pic->picture_parameter.pic_fields.frame_context_idx; + result.reset_frame_context = pic->picture_parameter.pic_fields.reset_frame_context; + + result.filter_level = pic->picture_parameter.filter_level; + result.sharpness_level = pic->picture_parameter.sharpness_level; + + for (i = 0; i < 8; ++i) + memcpy(result.lf_adj_level[i], pic->slice_parameter.seg_param[i].filter_level, 4 * 2); + + if (pic->picture_parameter.pic_fields.lossless_flag) { + result.base_qindex = 0; + result.y_dc_delta_q = 0; + result.uv_ac_delta_q = 0; + result.uv_dc_delta_q = 0; + } else { + result.base_qindex = pic->picture_parameter.base_qindex; + result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q; + result.uv_ac_delta_q = pic->picture_parameter.uv_ac_delta_q; + result.uv_dc_delta_q = pic->picture_parameter.uv_dc_delta_q; + } + + result.log2_tile_cols = pic->picture_parameter.log2_tile_columns; + result.log2_tile_rows = pic->picture_parameter.log2_tile_rows; + result.chroma_format = 1; + result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = + (pic->picture_parameter.bit_depth - 8); + + result.vp9_frame_size = align(dec->bs_size, 128); + result.uncompressed_header_size = pic->picture_parameter.frame_header_length_in_bytes; + result.compressed_header_size = pic->picture_parameter.first_partition_size; + + assert(dec->base.max_references + 1 <= 16); + + for (i = 0; i < 16; ++i) { + if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) { + result.curr_pic_idx = (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base); + break; + } else if (!dec->render_pic_list[i]) { + dec->render_pic_list[i] = target; + result.curr_pic_idx = dec->ref_idx; + vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)dec->ref_idx++, + &radeon_dec_destroy_associated_data); + break; + } + } + + for (i = 0; i < 8; i++) { + result.ref_frame_map[i] = + (pic->ref[i]) ? (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) + : 0x7f; + } + + result.frame_refs[0] = result.ref_frame_map[pic->picture_parameter.pic_fields.last_ref_frame]; + result.ref_frame_sign_bias[0] = pic->picture_parameter.pic_fields.last_ref_frame_sign_bias; + result.frame_refs[1] = result.ref_frame_map[pic->picture_parameter.pic_fields.golden_ref_frame]; + result.ref_frame_sign_bias[1] = pic->picture_parameter.pic_fields.golden_ref_frame_sign_bias; + result.frame_refs[2] = result.ref_frame_map[pic->picture_parameter.pic_fields.alt_ref_frame]; + result.ref_frame_sign_bias[2] = pic->picture_parameter.pic_fields.alt_ref_frame_sign_bias; + + if (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) { + if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) { + result.p010_mode = 1; + result.msb_mode = 1; + } else { + result.p010_mode = 0; + result.luma_10to8 = 1; + result.chroma_10to8 = 1; + } + } + + return result; } static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec) { - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - unsigned max_references = dec->base.max_references + 1; + unsigned max_references = dec->base.max_references + 1; - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); - width = align (width, 16); - height = align (height, 16); - return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; + width = align(width, 16); + height = align(height, 16); + return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; } -static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, struct pipe_h265_picture_desc *pic) +static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, + struct pipe_h265_picture_desc *pic) { - unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; - unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; - unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); + unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; + unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; + unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 || - pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + unsigned coeff_10bit = + (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; - unsigned max_references = dec->base.max_references + 1; + unsigned max_references = dec->base.max_references + 1; - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); - log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + - pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; - width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; - height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; - num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); - context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); - max_mb_address = (unsigned) ceil(height * 8 / 2048.0); + num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); + context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); + max_mb_address = (unsigned)ceil(height * 8 / 2048.0); - cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; - db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); + cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; + db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); - return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; + return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; } static rvcn_dec_message_vc1_t get_vc1_msg(struct pipe_vc1_picture_desc *pic) { - rvcn_dec_message_vc1_t result; - - memset(&result, 0, sizeof(result)); - switch(pic->base.profile) { - case PIPE_VIDEO_PROFILE_VC1_SIMPLE: - result.profile = RDECODE_VC1_PROFILE_SIMPLE; - result.level = 1; - break; - - case PIPE_VIDEO_PROFILE_VC1_MAIN: - result.profile = RDECODE_VC1_PROFILE_MAIN; - result.level = 2; - break; - - case PIPE_VIDEO_PROFILE_VC1_ADVANCED: - result.profile = RDECODE_VC1_PROFILE_ADVANCED; - result.level = 4; - break; - - default: - assert(0); - } - - result.sps_info_flags |= pic->postprocflag << 7; - result.sps_info_flags |= pic->pulldown << 6; - result.sps_info_flags |= pic->interlace << 5; - result.sps_info_flags |= pic->tfcntrflag << 4; - result.sps_info_flags |= pic->finterpflag << 3; - result.sps_info_flags |= pic->psf << 1; - - result.pps_info_flags |= pic->range_mapy_flag << 31; - result.pps_info_flags |= pic->range_mapy << 28; - result.pps_info_flags |= pic->range_mapuv_flag << 27; - result.pps_info_flags |= pic->range_mapuv << 24; - result.pps_info_flags |= pic->multires << 21; - result.pps_info_flags |= pic->maxbframes << 16; - result.pps_info_flags |= pic->overlap << 11; - result.pps_info_flags |= pic->quantizer << 9; - result.pps_info_flags |= pic->panscan_flag << 7; - result.pps_info_flags |= pic->refdist_flag << 6; - result.pps_info_flags |= pic->vstransform << 0; - - if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { - result.pps_info_flags |= pic->syncmarker << 20; - result.pps_info_flags |= pic->rangered << 19; - result.pps_info_flags |= pic->loopfilter << 5; - result.pps_info_flags |= pic->fastuvmc << 4; - result.pps_info_flags |= pic->extended_mv << 3; - result.pps_info_flags |= pic->extended_dmv << 8; - result.pps_info_flags |= pic->dquant << 1; - } - - result.chroma_format = 1; - - return result; + rvcn_dec_message_vc1_t result; + + memset(&result, 0, sizeof(result)); + switch (pic->base.profile) { + case PIPE_VIDEO_PROFILE_VC1_SIMPLE: + result.profile = RDECODE_VC1_PROFILE_SIMPLE; + result.level = 1; + break; + + case PIPE_VIDEO_PROFILE_VC1_MAIN: + result.profile = RDECODE_VC1_PROFILE_MAIN; + result.level = 2; + break; + + case PIPE_VIDEO_PROFILE_VC1_ADVANCED: + result.profile = RDECODE_VC1_PROFILE_ADVANCED; + result.level = 4; + break; + + default: + assert(0); + } + + result.sps_info_flags |= pic->postprocflag << 7; + result.sps_info_flags |= pic->pulldown << 6; + result.sps_info_flags |= pic->interlace << 5; + result.sps_info_flags |= pic->tfcntrflag << 4; + result.sps_info_flags |= pic->finterpflag << 3; + result.sps_info_flags |= pic->psf << 1; + + result.pps_info_flags |= pic->range_mapy_flag << 31; + result.pps_info_flags |= pic->range_mapy << 28; + result.pps_info_flags |= pic->range_mapuv_flag << 27; + result.pps_info_flags |= pic->range_mapuv << 24; + result.pps_info_flags |= pic->multires << 21; + result.pps_info_flags |= pic->maxbframes << 16; + result.pps_info_flags |= pic->overlap << 11; + result.pps_info_flags |= pic->quantizer << 9; + result.pps_info_flags |= pic->panscan_flag << 7; + result.pps_info_flags |= pic->refdist_flag << 6; + result.pps_info_flags |= pic->vstransform << 0; + + if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { + result.pps_info_flags |= pic->syncmarker << 20; + result.pps_info_flags |= pic->rangered << 19; + result.pps_info_flags |= pic->loopfilter << 5; + result.pps_info_flags |= pic->fastuvmc << 4; + result.pps_info_flags |= pic->extended_mv << 3; + result.pps_info_flags |= pic->extended_dmv << 8; + result.pps_info_flags |= pic->dquant << 1; + } + + result.chroma_format = 1; + + return result; } static uint32_t get_ref_pic_idx(struct radeon_decoder *dec, struct pipe_video_buffer *ref) { - uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; - uint32_t max = MAX2(dec->frame_number, 1) - 1; - uintptr_t frame; + uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; + uint32_t max = MAX2(dec->frame_number, 1) - 1; + uintptr_t frame; - /* seems to be the most sane fallback */ - if (!ref) - return max; + /* seems to be the most sane fallback */ + if (!ref) + return max; - /* get the frame number from the associated data */ - frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + /* get the frame number from the associated data */ + frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); - /* limit the frame number to a valid range */ - return MAX2(MIN2(frame, max), min); + /* limit the frame number to a valid range */ + return MAX2(MIN2(frame, max), min); } static rvcn_dec_message_mpeg2_vld_t get_mpeg2_msg(struct radeon_decoder *dec, - struct pipe_mpeg12_picture_desc *pic) + struct pipe_mpeg12_picture_desc *pic) { - const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; - rvcn_dec_message_mpeg2_vld_t result; - unsigned i; - - memset(&result, 0, sizeof(result)); - result.decoded_pic_idx = dec->frame_number; - - result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]); - result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]); - - if(pic->intra_matrix) { - result.load_intra_quantiser_matrix = 1; - for (i = 0; i < 64; ++i) { - result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; - } - } - if(pic->non_intra_matrix) { - result.load_nonintra_quantiser_matrix = 1; - for (i = 0; i < 64; ++i) { - result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; - } - } - - result.profile_and_level_indication = 0; - result.chroma_format = 0x1; - - result.picture_coding_type = pic->picture_coding_type; - result.f_code[0][0] = pic->f_code[0][0] + 1; - result.f_code[0][1] = pic->f_code[0][1] + 1; - result.f_code[1][0] = pic->f_code[1][0] + 1; - result.f_code[1][1] = pic->f_code[1][1] + 1; - result.intra_dc_precision = pic->intra_dc_precision; - result.pic_structure = pic->picture_structure; - result.top_field_first = pic->top_field_first; - result.frame_pred_frame_dct = pic->frame_pred_frame_dct; - result.concealment_motion_vectors = pic->concealment_motion_vectors; - result.q_scale_type = pic->q_scale_type; - result.intra_vlc_format = pic->intra_vlc_format; - result.alternate_scan = pic->alternate_scan; - - return result; + const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; + rvcn_dec_message_mpeg2_vld_t result; + unsigned i; + + memset(&result, 0, sizeof(result)); + result.decoded_pic_idx = dec->frame_number; + + result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]); + result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]); + + if (pic->intra_matrix) { + result.load_intra_quantiser_matrix = 1; + for (i = 0; i < 64; ++i) { + result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; + } + } + if (pic->non_intra_matrix) { + result.load_nonintra_quantiser_matrix = 1; + for (i = 0; i < 64; ++i) { + result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; + } + } + + result.profile_and_level_indication = 0; + result.chroma_format = 0x1; + + result.picture_coding_type = pic->picture_coding_type; + result.f_code[0][0] = pic->f_code[0][0] + 1; + result.f_code[0][1] = pic->f_code[0][1] + 1; + result.f_code[1][0] = pic->f_code[1][0] + 1; + result.f_code[1][1] = pic->f_code[1][1] + 1; + result.intra_dc_precision = pic->intra_dc_precision; + result.pic_structure = pic->picture_structure; + result.top_field_first = pic->top_field_first; + result.frame_pred_frame_dct = pic->frame_pred_frame_dct; + result.concealment_motion_vectors = pic->concealment_motion_vectors; + result.q_scale_type = pic->q_scale_type; + result.intra_vlc_format = pic->intra_vlc_format; + result.alternate_scan = pic->alternate_scan; + + return result; } static rvcn_dec_message_mpeg4_asp_vld_t get_mpeg4_msg(struct radeon_decoder *dec, - struct pipe_mpeg4_picture_desc *pic) + struct pipe_mpeg4_picture_desc *pic) { - rvcn_dec_message_mpeg4_asp_vld_t result; - unsigned i; + rvcn_dec_message_mpeg4_asp_vld_t result; + unsigned i; - memset(&result, 0, sizeof(result)); - result.decoded_pic_idx = dec->frame_number; + memset(&result, 0, sizeof(result)); + result.decoded_pic_idx = dec->frame_number; - result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]); - result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]); + result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]); + result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]); - result.variant_type = 0; - result.profile_and_level_indication = 0xF0; + result.variant_type = 0; + result.profile_and_level_indication = 0xF0; - result.video_object_layer_verid = 0x5; - result.video_object_layer_shape = 0x0; + result.video_object_layer_verid = 0x5; + result.video_object_layer_shape = 0x0; - result.video_object_layer_width = dec->base.width; - result.video_object_layer_height = dec->base.height; + result.video_object_layer_width = dec->base.width; + result.video_object_layer_height = dec->base.height; - result.vop_time_increment_resolution = pic->vop_time_increment_resolution; + result.vop_time_increment_resolution = pic->vop_time_increment_resolution; - result.short_video_header = pic->short_video_header; - result.interlaced = pic->interlaced; - result.load_intra_quant_mat = 1; - result.load_nonintra_quant_mat = 1; - result.quarter_sample = pic->quarter_sample; - result.complexity_estimation_disable = 1; - result.resync_marker_disable = pic->resync_marker_disable; - result.newpred_enable = 0; - result.reduced_resolution_vop_enable = 0; + result.short_video_header = pic->short_video_header; + result.interlaced = pic->interlaced; + result.load_intra_quant_mat = 1; + result.load_nonintra_quant_mat = 1; + result.quarter_sample = pic->quarter_sample; + result.complexity_estimation_disable = 1; + result.resync_marker_disable = pic->resync_marker_disable; + result.newpred_enable = 0; + result.reduced_resolution_vop_enable = 0; - result.quant_type = pic->quant_type; + result.quant_type = pic->quant_type; - for (i = 0; i < 64; ++i) { - result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; - result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; - } + for (i = 0; i < 64; ++i) { + result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; + result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; + } - return result; + return result; } static void rvcn_dec_message_create(struct radeon_decoder *dec) { - rvcn_dec_message_header_t *header = dec->msg; - rvcn_dec_message_create_t *create = dec->msg + sizeof(rvcn_dec_message_header_t); - unsigned sizes = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t); - - memset(dec->msg, 0, sizes); - header->header_size = sizeof(rvcn_dec_message_header_t); - header->total_size = sizes; - header->num_buffers = 1; - header->msg_type = RDECODE_MSG_CREATE; - header->stream_handle = dec->stream_handle; - header->status_report_feedback_number = 0; - - header->index[0].message_id = RDECODE_MESSAGE_CREATE; - header->index[0].offset = sizeof(rvcn_dec_message_header_t); - header->index[0].size = sizeof(rvcn_dec_message_create_t); - header->index[0].filled = 0; - - create->stream_type = dec->stream_type; - create->session_flags = 0; - create->width_in_samples = dec->base.width; - create->height_in_samples = dec->base.height; + rvcn_dec_message_header_t *header = dec->msg; + rvcn_dec_message_create_t *create = dec->msg + sizeof(rvcn_dec_message_header_t); + unsigned sizes = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t); + + memset(dec->msg, 0, sizes); + header->header_size = sizeof(rvcn_dec_message_header_t); + header->total_size = sizes; + header->num_buffers = 1; + header->msg_type = RDECODE_MSG_CREATE; + header->stream_handle = dec->stream_handle; + header->status_report_feedback_number = 0; + + header->index[0].message_id = RDECODE_MESSAGE_CREATE; + header->index[0].offset = sizeof(rvcn_dec_message_header_t); + header->index[0].size = sizeof(rvcn_dec_message_create_t); + header->index[0].filled = 0; + + create->stream_type = dec->stream_type; + create->session_flags = 0; + create->width_in_samples = dec->base.width; + create->height_in_samples = dec->base.height; } static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct si_texture *luma = (struct si_texture *) - ((struct vl_video_buffer *)target)->resources[0]; - struct si_texture *chroma = (struct si_texture *) - ((struct vl_video_buffer *)target)->resources[1]; - rvcn_dec_message_header_t *header; - rvcn_dec_message_index_t *index; - rvcn_dec_message_decode_t *decode; - unsigned sizes = 0, offset_decode, offset_codec; - void *codec; - - header = dec->msg; - sizes += sizeof(rvcn_dec_message_header_t); - index = (void*)header + sizeof(rvcn_dec_message_header_t); - sizes += sizeof(rvcn_dec_message_index_t); - offset_decode = sizes; - decode = (void*)index + sizeof(rvcn_dec_message_index_t); - sizes += sizeof(rvcn_dec_message_decode_t); - offset_codec = sizes; - codec = (void*)decode + sizeof(rvcn_dec_message_decode_t); - - memset(dec->msg, 0, sizes); - header->header_size = sizeof(rvcn_dec_message_header_t); - header->total_size = sizes; - header->num_buffers = 2; - header->msg_type = RDECODE_MSG_DECODE; - header->stream_handle = dec->stream_handle; - header->status_report_feedback_number = dec->frame_number; - - header->index[0].message_id = RDECODE_MESSAGE_DECODE; - header->index[0].offset = offset_decode; - header->index[0].size = sizeof(rvcn_dec_message_decode_t); - header->index[0].filled = 0; - - index->offset = offset_codec; - index->size = sizeof(rvcn_dec_message_avc_t); - index->filled = 0; - - decode->stream_type = dec->stream_type; - decode->decode_flags = 0x1; - decode->width_in_samples = dec->base.width; - decode->height_in_samples = dec->base.height; - - decode->bsd_size = align(dec->bs_size, 128); - decode->dpb_size = dec->dpb.res->buf->size; - decode->dt_size = - si_resource(((struct vl_video_buffer *)target)->resources[0])->buf->size + - si_resource(((struct vl_video_buffer *)target)->resources[1])->buf->size; - - decode->sct_size = 0; - decode->sc_coeff_size = 0; - - decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE; - decode->db_pitch = (((struct si_screen*)dec->screen)->info.family >= CHIP_RENOIR && - dec->base.width > 32 && dec->stream_type == RDECODE_CODEC_VP9) ? - align(dec->base.width, 64) : - align(dec->base.width, 32) ; - decode->db_surf_tile_config = 0; - - decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; - decode->dt_uv_pitch = decode->dt_pitch / 2; - - decode->dt_tiling_mode = 0; - decode->dt_swizzle_mode = RDECODE_SW_MODE_LINEAR; - decode->dt_array_mode = RDECODE_ARRAY_MODE_LINEAR; - decode->dt_field_mode = ((struct vl_video_buffer *)target)->base.interlaced; - decode->dt_surf_tile_config = 0; - decode->dt_uv_surf_tile_config = 0; - - decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset; - decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset; - if (decode->dt_field_mode) { - decode->dt_luma_bottom_offset = luma->surface.u.gfx9.surf_offset + - luma->surface.u.gfx9.surf_slice_size; - decode->dt_chroma_bottom_offset = chroma->surface.u.gfx9.surf_offset + - chroma->surface.u.gfx9.surf_slice_size; - } else { - decode->dt_luma_bottom_offset = decode->dt_luma_top_offset; - decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset; - } - - switch (u_reduce_video_profile(picture->profile)) { - case PIPE_VIDEO_FORMAT_MPEG4_AVC: { - rvcn_dec_message_avc_t avc = - get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture); - memcpy(codec, (void*)&avc, sizeof(rvcn_dec_message_avc_t)); - index->message_id = RDECODE_MESSAGE_AVC; - break; - } - case PIPE_VIDEO_FORMAT_HEVC: { - rvcn_dec_message_hevc_t hevc = - get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture); - - memcpy(codec, (void*)&hevc, sizeof(rvcn_dec_message_hevc_t)); - index->message_id = RDECODE_MESSAGE_HEVC; - if (dec->ctx.res == NULL) { - unsigned ctx_size; - if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) - ctx_size = calc_ctx_size_h265_main10(dec, - (struct pipe_h265_picture_desc*)picture); - else - ctx_size = calc_ctx_size_h265_main(dec); - if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) - RVID_ERR("Can't allocated context buffer.\n"); - si_vid_clear_buffer(dec->base.context, &dec->ctx); - } - break; - } - case PIPE_VIDEO_FORMAT_VC1: { - rvcn_dec_message_vc1_t vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture); - - memcpy(codec, (void*)&vc1, sizeof(rvcn_dec_message_vc1_t)); - if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) || - (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) { - decode->width_in_samples = align(decode->width_in_samples, 16) / 16; - decode->height_in_samples = align(decode->height_in_samples, 16) / 16; - } - index->message_id = RDECODE_MESSAGE_VC1; - break; - - } - case PIPE_VIDEO_FORMAT_MPEG12: { - rvcn_dec_message_mpeg2_vld_t mpeg2 = - get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture); - - memcpy(codec, (void*)&mpeg2, sizeof(rvcn_dec_message_mpeg2_vld_t)); - index->message_id = RDECODE_MESSAGE_MPEG2_VLD; - break; - } - case PIPE_VIDEO_FORMAT_MPEG4: { - rvcn_dec_message_mpeg4_asp_vld_t mpeg4 = - get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture); - - memcpy(codec, (void*)&mpeg4, sizeof(rvcn_dec_message_mpeg4_asp_vld_t)); - index->message_id = RDECODE_MESSAGE_MPEG4_ASP_VLD; - break; - } - case PIPE_VIDEO_FORMAT_VP9: { - rvcn_dec_message_vp9_t vp9 = - get_vp9_msg(dec, target, (struct pipe_vp9_picture_desc*)picture); - - memcpy(codec, (void*)&vp9, sizeof(rvcn_dec_message_vp9_t)); - index->message_id = RDECODE_MESSAGE_VP9; - - if (dec->ctx.res == NULL) { - unsigned ctx_size; - uint8_t *ptr; - - /* default probability + probability data */ - ctx_size = 2304 * 5; - - if (((struct si_screen*)dec->screen)->info.family >= CHIP_RENOIR) { - /* SRE collocated context data */ - ctx_size += 32 * 2 * 128 * 68; - /* SMP collocated context data */ - ctx_size += 9 * 64 * 2 * 128 * 68; - /* SDB left tile pixel */ - ctx_size += 8 * 2 * 2 * 8192; - } else { - ctx_size += 32 * 2 * 64 * 64; - ctx_size += 9 * 64 * 2 * 64 * 64; - ctx_size += 8 * 2 * 4096; - } - - if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) - ctx_size += 8 * 2 * 4096; - - if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) - RVID_ERR("Can't allocated context buffer.\n"); - si_vid_clear_buffer(dec->base.context, &dec->ctx); - - /* ctx needs probs table */ - ptr = dec->ws->buffer_map( - dec->ctx.res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - fill_probs_table(ptr); - dec->ws->buffer_unmap(dec->ctx.res->buf); - } - break; - } - default: - assert(0); - return NULL; - } - - if (dec->ctx.res) - decode->hw_ctxt_size = dec->ctx.res->buf->size; - - return luma->buffer.buf; + struct si_texture *luma = (struct si_texture *)((struct vl_video_buffer *)target)->resources[0]; + struct si_texture *chroma = + (struct si_texture *)((struct vl_video_buffer *)target)->resources[1]; + rvcn_dec_message_header_t *header; + rvcn_dec_message_index_t *index; + rvcn_dec_message_decode_t *decode; + unsigned sizes = 0, offset_decode, offset_codec; + void *codec; + + header = dec->msg; + sizes += sizeof(rvcn_dec_message_header_t); + index = (void *)header + sizeof(rvcn_dec_message_header_t); + sizes += sizeof(rvcn_dec_message_index_t); + offset_decode = sizes; + decode = (void *)index + sizeof(rvcn_dec_message_index_t); + sizes += sizeof(rvcn_dec_message_decode_t); + offset_codec = sizes; + codec = (void *)decode + sizeof(rvcn_dec_message_decode_t); + + memset(dec->msg, 0, sizes); + header->header_size = sizeof(rvcn_dec_message_header_t); + header->total_size = sizes; + header->num_buffers = 2; + header->msg_type = RDECODE_MSG_DECODE; + header->stream_handle = dec->stream_handle; + header->status_report_feedback_number = dec->frame_number; + + header->index[0].message_id = RDECODE_MESSAGE_DECODE; + header->index[0].offset = offset_decode; + header->index[0].size = sizeof(rvcn_dec_message_decode_t); + header->index[0].filled = 0; + + index->offset = offset_codec; + index->size = sizeof(rvcn_dec_message_avc_t); + index->filled = 0; + + decode->stream_type = dec->stream_type; + decode->decode_flags = 0x1; + decode->width_in_samples = dec->base.width; + decode->height_in_samples = dec->base.height; + + decode->bsd_size = align(dec->bs_size, 128); + decode->dpb_size = dec->dpb.res->buf->size; + decode->dt_size = si_resource(((struct vl_video_buffer *)target)->resources[0])->buf->size + + si_resource(((struct vl_video_buffer *)target)->resources[1])->buf->size; + + decode->sct_size = 0; + decode->sc_coeff_size = 0; + + decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE; + decode->db_pitch = (((struct si_screen *)dec->screen)->info.family >= CHIP_RENOIR && + dec->base.width > 32 && dec->stream_type == RDECODE_CODEC_VP9) + ? align(dec->base.width, 64) + : align(dec->base.width, 32); + decode->db_surf_tile_config = 0; + + decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; + decode->dt_uv_pitch = decode->dt_pitch / 2; + + decode->dt_tiling_mode = 0; + decode->dt_swizzle_mode = RDECODE_SW_MODE_LINEAR; + decode->dt_array_mode = RDECODE_ARRAY_MODE_LINEAR; + decode->dt_field_mode = ((struct vl_video_buffer *)target)->base.interlaced; + decode->dt_surf_tile_config = 0; + decode->dt_uv_surf_tile_config = 0; + + decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset; + decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset; + if (decode->dt_field_mode) { + decode->dt_luma_bottom_offset = + luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size; + decode->dt_chroma_bottom_offset = + chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size; + } else { + decode->dt_luma_bottom_offset = decode->dt_luma_top_offset; + decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset; + } + + switch (u_reduce_video_profile(picture->profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + rvcn_dec_message_avc_t avc = get_h264_msg(dec, (struct pipe_h264_picture_desc *)picture); + memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t)); + index->message_id = RDECODE_MESSAGE_AVC; + break; + } + case PIPE_VIDEO_FORMAT_HEVC: { + rvcn_dec_message_hevc_t hevc = + get_h265_msg(dec, target, (struct pipe_h265_picture_desc *)picture); + + memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t)); + index->message_id = RDECODE_MESSAGE_HEVC; + if (dec->ctx.res == NULL) { + unsigned ctx_size; + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc *)picture); + else + ctx_size = calc_ctx_size_h265_main(dec); + if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) + RVID_ERR("Can't allocated context buffer.\n"); + si_vid_clear_buffer(dec->base.context, &dec->ctx); + } + break; + } + case PIPE_VIDEO_FORMAT_VC1: { + rvcn_dec_message_vc1_t vc1 = get_vc1_msg((struct pipe_vc1_picture_desc *)picture); + + memcpy(codec, (void *)&vc1, sizeof(rvcn_dec_message_vc1_t)); + if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) || + (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) { + decode->width_in_samples = align(decode->width_in_samples, 16) / 16; + decode->height_in_samples = align(decode->height_in_samples, 16) / 16; + } + index->message_id = RDECODE_MESSAGE_VC1; + break; + } + case PIPE_VIDEO_FORMAT_MPEG12: { + rvcn_dec_message_mpeg2_vld_t mpeg2 = + get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc *)picture); + + memcpy(codec, (void *)&mpeg2, sizeof(rvcn_dec_message_mpeg2_vld_t)); + index->message_id = RDECODE_MESSAGE_MPEG2_VLD; + break; + } + case PIPE_VIDEO_FORMAT_MPEG4: { + rvcn_dec_message_mpeg4_asp_vld_t mpeg4 = + get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc *)picture); + + memcpy(codec, (void *)&mpeg4, sizeof(rvcn_dec_message_mpeg4_asp_vld_t)); + index->message_id = RDECODE_MESSAGE_MPEG4_ASP_VLD; + break; + } + case PIPE_VIDEO_FORMAT_VP9: { + rvcn_dec_message_vp9_t vp9 = + get_vp9_msg(dec, target, (struct pipe_vp9_picture_desc *)picture); + + memcpy(codec, (void *)&vp9, sizeof(rvcn_dec_message_vp9_t)); + index->message_id = RDECODE_MESSAGE_VP9; + + if (dec->ctx.res == NULL) { + unsigned ctx_size; + uint8_t *ptr; + + /* default probability + probability data */ + ctx_size = 2304 * 5; + + if (((struct si_screen *)dec->screen)->info.family >= CHIP_RENOIR) { + /* SRE collocated context data */ + ctx_size += 32 * 2 * 128 * 68; + /* SMP collocated context data */ + ctx_size += 9 * 64 * 2 * 128 * 68; + /* SDB left tile pixel */ + ctx_size += 8 * 2 * 2 * 8192; + } else { + ctx_size += 32 * 2 * 64 * 64; + ctx_size += 9 * 64 * 2 * 64 * 64; + ctx_size += 8 * 2 * 4096; + } + + if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) + ctx_size += 8 * 2 * 4096; + + if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) + RVID_ERR("Can't allocated context buffer.\n"); + si_vid_clear_buffer(dec->base.context, &dec->ctx); + + /* ctx needs probs table */ + ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + fill_probs_table(ptr); + dec->ws->buffer_unmap(dec->ctx.res->buf); + } + break; + } + default: + assert(0); + return NULL; + } + + if (dec->ctx.res) + decode->hw_ctxt_size = dec->ctx.res->buf->size; + + return luma->buffer.buf; } static void rvcn_dec_message_destroy(struct radeon_decoder *dec) { - rvcn_dec_message_header_t *header = dec->msg; - - memset(dec->msg, 0, sizeof(rvcn_dec_message_header_t)); - header->header_size = sizeof(rvcn_dec_message_header_t); - header->total_size = sizeof(rvcn_dec_message_header_t) - - sizeof(rvcn_dec_message_index_t); - header->num_buffers = 0; - header->msg_type = RDECODE_MSG_DESTROY; - header->stream_handle = dec->stream_handle; - header->status_report_feedback_number = 0; + rvcn_dec_message_header_t *header = dec->msg; + + memset(dec->msg, 0, sizeof(rvcn_dec_message_header_t)); + header->header_size = sizeof(rvcn_dec_message_header_t); + header->total_size = sizeof(rvcn_dec_message_header_t) - sizeof(rvcn_dec_message_index_t); + header->num_buffers = 0; + header->msg_type = RDECODE_MSG_DESTROY; + header->stream_handle = dec->stream_handle; + header->status_report_feedback_number = 0; } static void rvcn_dec_message_feedback(struct radeon_decoder *dec) { - rvcn_dec_feedback_header_t *header = (void*)dec->fb; + rvcn_dec_feedback_header_t *header = (void *)dec->fb; - header->header_size = sizeof(rvcn_dec_feedback_header_t); - header->total_size = sizeof(rvcn_dec_feedback_header_t); - header->num_buffers = 0; + header->header_size = sizeof(rvcn_dec_feedback_header_t); + header->total_size = sizeof(rvcn_dec_feedback_header_t); + header->num_buffers = 0; } /* flush IB to the hardware */ static int flush(struct radeon_decoder *dec, unsigned flags) { - return dec->ws->cs_flush(dec->cs, flags, NULL); + return dec->ws->cs_flush(dec->cs, flags, NULL); } /* add a new set register command to the IB */ static void set_reg(struct radeon_decoder *dec, unsigned reg, uint32_t val) { - radeon_emit(dec->cs, RDECODE_PKT0(reg >> 2, 0)); - radeon_emit(dec->cs, val); + radeon_emit(dec->cs, RDECODE_PKT0(reg >> 2, 0)); + radeon_emit(dec->cs, val); } /* send a command to the VCPU through the GPCOM registers */ -static void send_cmd(struct radeon_decoder *dec, unsigned cmd, - struct pb_buffer* buf, uint32_t off, - enum radeon_bo_usage usage, enum radeon_bo_domain domain) +static void send_cmd(struct radeon_decoder *dec, unsigned cmd, struct pb_buffer *buf, uint32_t off, + enum radeon_bo_usage usage, enum radeon_bo_domain domain) { - uint64_t addr; + uint64_t addr; - dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); - addr = dec->ws->buffer_get_virtual_address(buf); - addr = addr + off; + dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + addr = dec->ws->buffer_get_virtual_address(buf); + addr = addr + off; - set_reg(dec, dec->reg.data0, addr); - set_reg(dec, dec->reg.data1, addr >> 32); - set_reg(dec, dec->reg.cmd, cmd << 1); + set_reg(dec, dec->reg.data0, addr); + set_reg(dec, dec->reg.data1, addr >> 32); + set_reg(dec, dec->reg.cmd, cmd << 1); } /* do the codec needs an IT buffer ?*/ static bool have_it(struct radeon_decoder *dec) { - return dec->stream_type == RDECODE_CODEC_H264_PERF || - dec->stream_type == RDECODE_CODEC_H265; + return dec->stream_type == RDECODE_CODEC_H264_PERF || dec->stream_type == RDECODE_CODEC_H265; } /* do the codec needs an probs buffer? */ static bool have_probs(struct radeon_decoder *dec) { - return dec->stream_type == RDECODE_CODEC_VP9; + return dec->stream_type == RDECODE_CODEC_VP9; } /* map the next available message/feedback/itscaling buffer */ static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec) { - struct rvid_buffer* buf; - uint8_t *ptr; + struct rvid_buffer *buf; + uint8_t *ptr; - /* grab the current message/feedback buffer */ - buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; + /* grab the current message/feedback buffer */ + buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; - /* and map it for CPU access */ - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + /* and map it for CPU access */ + ptr = + dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - /* calc buffer offsets */ - dec->msg = ptr; + /* calc buffer offsets */ + dec->msg = ptr; - dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); - if (have_it(dec)) - dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE); - else if (have_probs(dec)) - dec->probs = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE); + dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); + if (have_it(dec)) + dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE); + else if (have_probs(dec)) + dec->probs = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE); } /* unmap and send a message command to the VCPU */ static void send_msg_buf(struct radeon_decoder *dec) { - struct rvid_buffer* buf; - - /* ignore the request if message/feedback buffer isn't mapped */ - if (!dec->msg || !dec->fb) - return; - - /* grab the current message buffer */ - buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; - - /* unmap the buffer */ - dec->ws->buffer_unmap(buf->res->buf); - dec->msg = NULL; - dec->fb = NULL; - dec->it = NULL; - dec->probs = NULL; - - if (dec->sessionctx.res) - send_cmd(dec, RDECODE_CMD_SESSION_CONTEXT_BUFFER, - dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE, - RADEON_DOMAIN_VRAM); - - /* and send it to the hardware */ - send_cmd(dec, RDECODE_CMD_MSG_BUFFER, buf->res->buf, 0, - RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + struct rvid_buffer *buf; + + /* ignore the request if message/feedback buffer isn't mapped */ + if (!dec->msg || !dec->fb) + return; + + /* grab the current message buffer */ + buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; + + /* unmap the buffer */ + dec->ws->buffer_unmap(buf->res->buf); + dec->msg = NULL; + dec->fb = NULL; + dec->it = NULL; + dec->probs = NULL; + + if (dec->sessionctx.res) + send_cmd(dec, RDECODE_CMD_SESSION_CONTEXT_BUFFER, dec->sessionctx.res->buf, 0, + RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); + + /* and send it to the hardware */ + send_cmd(dec, RDECODE_CMD_MSG_BUFFER, buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); } /* cycle to the next set of buffers */ static void next_buffer(struct radeon_decoder *dec) { - ++dec->cur_buffer; - dec->cur_buffer %= NUM_BUFFERS; + ++dec->cur_buffer; + dec->cur_buffer %= NUM_BUFFERS; } static unsigned calc_ctx_size_h264_perf(struct radeon_decoder *dec) { - unsigned width_in_mb, height_in_mb, ctx_size; - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - - unsigned max_references = dec->base.max_references + 1; - - // picture width & height in 16 pixel units - width_in_mb = width / VL_MACROBLOCK_WIDTH; - height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); - - unsigned fs_in_mb = width_in_mb * height_in_mb; - unsigned num_dpb_buffer; - switch(dec->base.level) { - case 30: - num_dpb_buffer = 8100 / fs_in_mb; - break; - case 31: - num_dpb_buffer = 18000 / fs_in_mb; - break; - case 32: - num_dpb_buffer = 20480 / fs_in_mb; - break; - case 41: - num_dpb_buffer = 32768 / fs_in_mb; - break; - case 42: - num_dpb_buffer = 34816 / fs_in_mb; - break; - case 50: - num_dpb_buffer = 110400 / fs_in_mb; - break; - case 51: - num_dpb_buffer = 184320 / fs_in_mb; - break; - default: - num_dpb_buffer = 184320 / fs_in_mb; - break; - } - num_dpb_buffer++; - max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); - ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256); - - return ctx_size; + unsigned width_in_mb, height_in_mb, ctx_size; + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + unsigned max_references = dec->base.max_references + 1; + + // picture width & height in 16 pixel units + width_in_mb = width / VL_MACROBLOCK_WIDTH; + height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); + + unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned num_dpb_buffer; + switch (dec->base.level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); + ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256); + + return ctx_size; } /* calculate size of reference picture buffer */ static unsigned calc_dpb_size(struct radeon_decoder *dec) { - unsigned width_in_mb, height_in_mb, image_size, dpb_size; - - // always align them to MB size for dpb calculation - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - - // always one more for currently decoded picture - unsigned max_references = dec->base.max_references + 1; - - // aligned size of a single frame - image_size = align(width, 32) * height; - image_size += image_size / 2; - image_size = align(image_size, 1024); - - // picture width & height in 16 pixel units - width_in_mb = width / VL_MACROBLOCK_WIDTH; - height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); - - switch (u_reduce_video_profile(dec->base.profile)) { - case PIPE_VIDEO_FORMAT_MPEG4_AVC: { - unsigned fs_in_mb = width_in_mb * height_in_mb; - unsigned num_dpb_buffer; - - switch(dec->base.level) { - case 30: - num_dpb_buffer = 8100 / fs_in_mb; - break; - case 31: - num_dpb_buffer = 18000 / fs_in_mb; - break; - case 32: - num_dpb_buffer = 20480 / fs_in_mb; - break; - case 41: - num_dpb_buffer = 32768 / fs_in_mb; - break; - case 42: - num_dpb_buffer = 34816 / fs_in_mb; - break; - case 50: - num_dpb_buffer = 110400 / fs_in_mb; - break; - case 51: - num_dpb_buffer = 184320 / fs_in_mb; - break; - default: - num_dpb_buffer = 184320 / fs_in_mb; - break; - } - num_dpb_buffer++; - max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); - dpb_size = image_size * max_references; - break; - } - - case PIPE_VIDEO_FORMAT_HEVC: - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); - - width = align (width, 16); - height = align (height, 16); - if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) - dpb_size = align((align(width, 32) * height * 9) / 4, 256) * max_references; - else - dpb_size = align((align(width, 32) * height * 3) / 2, 256) * max_references; - break; - - case PIPE_VIDEO_FORMAT_VC1: - // the firmware seems to allways assume a minimum of ref frames - max_references = MAX2(NUM_VC1_REFS, max_references); - - // reference picture buffer - dpb_size = image_size * max_references; - - // CONTEXT_BUFFER - dpb_size += width_in_mb * height_in_mb * 128; - - // IT surface buffer - dpb_size += width_in_mb * 64; - - // DB surface buffer - dpb_size += width_in_mb * 128; - - // BP - dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); - break; - - case PIPE_VIDEO_FORMAT_MPEG12: - // reference picture buffer, must be big enough for all frames - dpb_size = image_size * NUM_MPEG2_REFS; - break; - - case PIPE_VIDEO_FORMAT_MPEG4: - // reference picture buffer - dpb_size = image_size * max_references; - - // CM - dpb_size += width_in_mb * height_in_mb * 64; - - // IT surface buffer - dpb_size += align(width_in_mb * height_in_mb * 32, 64); - - dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); - break; - - case PIPE_VIDEO_FORMAT_VP9: - max_references = MAX2(max_references, 9); - - dpb_size = (((struct si_screen*)dec->screen)->info.family >= CHIP_RENOIR) ? - (8192 * 4320 * 3 / 2) * max_references : - (4096 * 3000 * 3 / 2) * max_references; - - if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) - dpb_size *= (3 / 2); - break; - - case PIPE_VIDEO_FORMAT_JPEG: - dpb_size = 0; - break; - - default: - // something is missing here - assert(0); - - // at least use a sane default value - dpb_size = 32 * 1024 * 1024; - break; - } - return dpb_size; + unsigned width_in_mb, height_in_mb, image_size, dpb_size; + + // always align them to MB size for dpb calculation + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + // always one more for currently decoded picture + unsigned max_references = dec->base.max_references + 1; + + // aligned size of a single frame + image_size = align(width, 32) * height; + image_size += image_size / 2; + image_size = align(image_size, 1024); + + // picture width & height in 16 pixel units + width_in_mb = width / VL_MACROBLOCK_WIDTH; + height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); + + switch (u_reduce_video_profile(dec->base.profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned num_dpb_buffer; + + switch (dec->base.level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); + dpb_size = image_size * max_references; + break; + } + + case PIPE_VIDEO_FORMAT_HEVC: + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align(width, 16); + height = align(height, 16); + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + dpb_size = align((align(width, 32) * height * 9) / 4, 256) * max_references; + else + dpb_size = align((align(width, 32) * height * 3) / 2, 256) * max_references; + break; + + case PIPE_VIDEO_FORMAT_VC1: + // the firmware seems to allways assume a minimum of ref frames + max_references = MAX2(NUM_VC1_REFS, max_references); + + // reference picture buffer + dpb_size = image_size * max_references; + + // CONTEXT_BUFFER + dpb_size += width_in_mb * height_in_mb * 128; + + // IT surface buffer + dpb_size += width_in_mb * 64; + + // DB surface buffer + dpb_size += width_in_mb * 128; + + // BP + dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); + break; + + case PIPE_VIDEO_FORMAT_MPEG12: + // reference picture buffer, must be big enough for all frames + dpb_size = image_size * NUM_MPEG2_REFS; + break; + + case PIPE_VIDEO_FORMAT_MPEG4: + // reference picture buffer + dpb_size = image_size * max_references; + + // CM + dpb_size += width_in_mb * height_in_mb * 64; + + // IT surface buffer + dpb_size += align(width_in_mb * height_in_mb * 32, 64); + + dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); + break; + + case PIPE_VIDEO_FORMAT_VP9: + max_references = MAX2(max_references, 9); + + dpb_size = (((struct si_screen *)dec->screen)->info.family >= CHIP_RENOIR) + ? (8192 * 4320 * 3 / 2) * max_references + : (4096 * 3000 * 3 / 2) * max_references; + + if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) + dpb_size *= (3 / 2); + break; + + case PIPE_VIDEO_FORMAT_JPEG: + dpb_size = 0; + break; + + default: + // something is missing here + assert(0); + + // at least use a sane default value + dpb_size = 32 * 1024 * 1024; + break; + } + return dpb_size; } /** @@ -1293,169 +1261,162 @@ static unsigned calc_dpb_size(struct radeon_decoder *dec) */ static void radeon_dec_destroy(struct pipe_video_codec *decoder) { - struct radeon_decoder *dec = (struct radeon_decoder*)decoder; - unsigned i; + struct radeon_decoder *dec = (struct radeon_decoder *)decoder; + unsigned i; - assert(decoder); + assert(decoder); - map_msg_fb_it_probs_buf(dec); - rvcn_dec_message_destroy(dec); - send_msg_buf(dec); + map_msg_fb_it_probs_buf(dec); + rvcn_dec_message_destroy(dec); + send_msg_buf(dec); - flush(dec, 0); + flush(dec, 0); - dec->ws->cs_destroy(dec->cs); + dec->ws->cs_destroy(dec->cs); - for (i = 0; i < NUM_BUFFERS; ++i) { - si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); - si_vid_destroy_buffer(&dec->bs_buffers[i]); - } + for (i = 0; i < NUM_BUFFERS; ++i) { + si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); + si_vid_destroy_buffer(&dec->bs_buffers[i]); + } - si_vid_destroy_buffer(&dec->dpb); - si_vid_destroy_buffer(&dec->ctx); - si_vid_destroy_buffer(&dec->sessionctx); + si_vid_destroy_buffer(&dec->dpb); + si_vid_destroy_buffer(&dec->ctx); + si_vid_destroy_buffer(&dec->sessionctx); - FREE(dec); + FREE(dec); } /** * start decoding of a new frame */ static void radeon_dec_begin_frame(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct radeon_decoder *dec = (struct radeon_decoder*)decoder; - uintptr_t frame; + struct radeon_decoder *dec = (struct radeon_decoder *)decoder; + uintptr_t frame; - assert(decoder); + assert(decoder); - frame = ++dec->frame_number; - if (dec->stream_type != RDECODE_CODEC_VP9) - vl_video_buffer_set_associated_data(target, decoder, (void *)frame, - &radeon_dec_destroy_associated_data); + frame = ++dec->frame_number; + if (dec->stream_type != RDECODE_CODEC_VP9) + vl_video_buffer_set_associated_data(target, decoder, (void *)frame, + &radeon_dec_destroy_associated_data); - dec->bs_size = 0; - dec->bs_ptr = dec->ws->buffer_map( - dec->bs_buffers[dec->cur_buffer].res->buf, - dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + dec->bs_size = 0; + dec->bs_ptr = dec->ws->buffer_map(dec->bs_buffers[dec->cur_buffer].res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); } /** * decode a macroblock */ static void radeon_dec_decode_macroblock(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture, - const struct pipe_macroblock *macroblocks, - unsigned num_macroblocks) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, + const struct pipe_macroblock *macroblocks, + unsigned num_macroblocks) { - /* not supported (yet) */ - assert(0); + /* not supported (yet) */ + assert(0); } /** * decode a bitstream */ static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture, - unsigned num_buffers, - const void * const *buffers, - const unsigned *sizes) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, unsigned num_buffers, + const void *const *buffers, const unsigned *sizes) { - struct radeon_decoder *dec = (struct radeon_decoder*)decoder; - unsigned i; - - assert(decoder); - - if (!dec->bs_ptr) - return; - - for (i = 0; i < num_buffers; ++i) { - struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; - unsigned new_size = dec->bs_size + sizes[i]; - - if (new_size > buf->res->buf->size) { - dec->ws->buffer_unmap(buf->res->buf); - if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { - RVID_ERR("Can't resize bitstream buffer!"); - return; - } - - dec->bs_ptr = dec->ws->buffer_map( - buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - if (!dec->bs_ptr) - return; - - dec->bs_ptr += dec->bs_size; - } - - memcpy(dec->bs_ptr, buffers[i], sizes[i]); - dec->bs_size += sizes[i]; - dec->bs_ptr += sizes[i]; - } + struct radeon_decoder *dec = (struct radeon_decoder *)decoder; + unsigned i; + + assert(decoder); + + if (!dec->bs_ptr) + return; + + for (i = 0; i < num_buffers; ++i) { + struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; + unsigned new_size = dec->bs_size + sizes[i]; + + if (new_size > buf->res->buf->size) { + dec->ws->buffer_unmap(buf->res->buf); + if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { + RVID_ERR("Can't resize bitstream buffer!"); + return; + } + + dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + if (!dec->bs_ptr) + return; + + dec->bs_ptr += dec->bs_size; + } + + memcpy(dec->bs_ptr, buffers[i], sizes[i]); + dec->bs_size += sizes[i]; + dec->bs_ptr += sizes[i]; + } } /** * send cmd for vcn dec */ -void send_cmd_dec(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) +void send_cmd_dec(struct radeon_decoder *dec, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct pb_buffer *dt; - struct rvid_buffer *msg_fb_it_probs_buf, *bs_buf; - - msg_fb_it_probs_buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; - bs_buf = &dec->bs_buffers[dec->cur_buffer]; - - memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size); - dec->ws->buffer_unmap(bs_buf->res->buf); - - map_msg_fb_it_probs_buf(dec); - dt = rvcn_dec_message_decode(dec, target, picture); - rvcn_dec_message_feedback(dec); - send_msg_buf(dec); - - send_cmd(dec, RDECODE_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, - RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - if (dec->ctx.res) - send_cmd(dec, RDECODE_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, - RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - send_cmd(dec, RDECODE_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, - 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - send_cmd(dec, RDECODE_CMD_DECODING_TARGET_BUFFER, dt, 0, - RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); - send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_probs_buf->res->buf, - FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); - if (have_it(dec)) - send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_probs_buf->res->buf, - FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - else if (have_probs(dec)) - send_cmd(dec, RDECODE_CMD_PROB_TBL_BUFFER, msg_fb_it_probs_buf->res->buf, - FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - set_reg(dec, dec->reg.cntl, 1); + struct pb_buffer *dt; + struct rvid_buffer *msg_fb_it_probs_buf, *bs_buf; + + msg_fb_it_probs_buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; + bs_buf = &dec->bs_buffers[dec->cur_buffer]; + + memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size); + dec->ws->buffer_unmap(bs_buf->res->buf); + + map_msg_fb_it_probs_buf(dec); + dt = rvcn_dec_message_decode(dec, target, picture); + rvcn_dec_message_feedback(dec); + send_msg_buf(dec); + + send_cmd(dec, RDECODE_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE, + RADEON_DOMAIN_VRAM); + if (dec->ctx.res) + send_cmd(dec, RDECODE_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, RADEON_USAGE_READWRITE, + RADEON_DOMAIN_VRAM); + send_cmd(dec, RDECODE_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, 0, RADEON_USAGE_READ, + RADEON_DOMAIN_GTT); + send_cmd(dec, RDECODE_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); + send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_probs_buf->res->buf, FB_BUFFER_OFFSET, + RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); + if (have_it(dec)) + send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_probs_buf->res->buf, + FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + else if (have_probs(dec)) + send_cmd(dec, RDECODE_CMD_PROB_TBL_BUFFER, msg_fb_it_probs_buf->res->buf, + FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + set_reg(dec, dec->reg.cntl, 1); } /** * end decoding of the current frame */ -static void radeon_dec_end_frame(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) +static void radeon_dec_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct radeon_decoder *dec = (struct radeon_decoder*)decoder; + struct radeon_decoder *dec = (struct radeon_decoder *)decoder; - assert(decoder); + assert(decoder); - if (!dec->bs_ptr) - return; + if (!dec->bs_ptr) + return; - dec->send_cmd(dec, target, picture); - flush(dec, PIPE_FLUSH_ASYNC); - next_buffer(dec); + dec->send_cmd(dec, target, picture); + flush(dec, PIPE_FLUSH_ASYNC); + next_buffer(dec); } /** @@ -1469,190 +1430,189 @@ static void radeon_dec_flush(struct pipe_video_codec *decoder) * create and HW decoder */ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, - const struct pipe_video_codec *templ) + const struct pipe_video_codec *templ) { - struct si_context *sctx = (struct si_context*)context; - struct radeon_winsys *ws = sctx->ws; - unsigned width = templ->width, height = templ->height; - unsigned dpb_size, bs_buf_size, stream_type = 0, ring = RING_VCN_DEC; - struct radeon_decoder *dec; - int r, i; - - switch(u_reduce_video_profile(templ->profile)) { - case PIPE_VIDEO_FORMAT_MPEG12: - if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM) - return vl_create_mpeg12_decoder(context, templ); - stream_type = RDECODE_CODEC_MPEG2_VLD; - break; - case PIPE_VIDEO_FORMAT_MPEG4: - width = align(width, VL_MACROBLOCK_WIDTH); - height = align(height, VL_MACROBLOCK_HEIGHT); - stream_type = RDECODE_CODEC_MPEG4; - break; - case PIPE_VIDEO_FORMAT_VC1: - stream_type = RDECODE_CODEC_VC1; - break; - case PIPE_VIDEO_FORMAT_MPEG4_AVC: - width = align(width, VL_MACROBLOCK_WIDTH); - height = align(height, VL_MACROBLOCK_HEIGHT); - stream_type = RDECODE_CODEC_H264_PERF; - break; - case PIPE_VIDEO_FORMAT_HEVC: - stream_type = RDECODE_CODEC_H265; - break; - case PIPE_VIDEO_FORMAT_VP9: - stream_type = RDECODE_CODEC_VP9; - break; - case PIPE_VIDEO_FORMAT_JPEG: - stream_type = RDECODE_CODEC_JPEG; - ring = RING_VCN_JPEG; - break; - default: - assert(0); - break; - } - - dec = CALLOC_STRUCT(radeon_decoder); - - if (!dec) - return NULL; - - dec->base = *templ; - dec->base.context = context; - dec->base.width = width; - dec->base.height = height; - - dec->base.destroy = radeon_dec_destroy; - dec->base.begin_frame = radeon_dec_begin_frame; - dec->base.decode_macroblock = radeon_dec_decode_macroblock; - dec->base.decode_bitstream = radeon_dec_decode_bitstream; - dec->base.end_frame = radeon_dec_end_frame; - dec->base.flush = radeon_dec_flush; - - dec->stream_type = stream_type; - dec->stream_handle = si_vid_alloc_stream_handle(); - dec->screen = context->screen; - dec->ws = ws; - dec->cs = ws->cs_create(sctx->ctx, ring, NULL, NULL, false); - if (!dec->cs) { - RVID_ERR("Can't get command submission context.\n"); - goto error; - } - - for (i = 0; i < 16; i++) - dec->render_pic_list[i] = NULL; - bs_buf_size = width * height * (512 / (16 * 16)); - for (i = 0; i < NUM_BUFFERS; ++i) { - unsigned msg_fb_it_probs_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE; - if (have_it(dec)) - msg_fb_it_probs_size += IT_SCALING_TABLE_SIZE; - else if (have_probs(dec)) - msg_fb_it_probs_size += VP9_PROBS_TABLE_SIZE; - /* use vram to improve performance, workaround an unknown bug */ - if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_probs_buffers[i], - msg_fb_it_probs_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated message buffers.\n"); - goto error; - } - - if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], - bs_buf_size, PIPE_USAGE_STAGING)) { - RVID_ERR("Can't allocated bitstream buffers.\n"); - goto error; - } - - si_vid_clear_buffer(context, &dec->msg_fb_it_probs_buffers[i]); - si_vid_clear_buffer(context, &dec->bs_buffers[i]); - - if (have_probs(dec)) { - struct rvid_buffer* buf; - void *ptr; - - buf = &dec->msg_fb_it_probs_buffers[i]; - ptr = dec->ws->buffer_map( - buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE; - fill_probs_table(ptr); - dec->ws->buffer_unmap(buf->res->buf); - } - } - - dpb_size = calc_dpb_size(dec); - if (dpb_size) { - if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated dpb.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->dpb); - } - - if (dec->stream_type == RDECODE_CODEC_H264_PERF) { - unsigned ctx_size = calc_ctx_size_h264_perf(dec); - if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated context buffer.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->ctx); - } - - if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, - RDECODE_SESSION_CONTEXT_SIZE, - PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated session ctx.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->sessionctx); - - if (sctx->family == CHIP_ARCTURUS) { - dec->reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0; - dec->reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1; - dec->reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD; - dec->reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL; - dec->jpg.direct_reg = true; - } else if (sctx->family >= CHIP_NAVI10 || sctx->family == CHIP_RENOIR) { - dec->reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0; - dec->reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1; - dec->reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD; - dec->reg.cntl = RDECODE_VCN2_ENGINE_CNTL; - dec->jpg.direct_reg = true; - } else { - dec->reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0; - dec->reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1; - dec->reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD; - dec->reg.cntl = RDECODE_VCN1_ENGINE_CNTL; - dec->jpg.direct_reg = false; - } - - map_msg_fb_it_probs_buf(dec); - rvcn_dec_message_create(dec); - send_msg_buf(dec); - r = flush(dec, 0); - if (r) - goto error; - - next_buffer(dec); - - if (stream_type == RDECODE_CODEC_JPEG) - dec->send_cmd = send_cmd_jpeg; - else - dec->send_cmd = send_cmd_dec; - - return &dec->base; + struct si_context *sctx = (struct si_context *)context; + struct radeon_winsys *ws = sctx->ws; + unsigned width = templ->width, height = templ->height; + unsigned dpb_size, bs_buf_size, stream_type = 0, ring = RING_VCN_DEC; + struct radeon_decoder *dec; + int r, i; + + switch (u_reduce_video_profile(templ->profile)) { + case PIPE_VIDEO_FORMAT_MPEG12: + if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM) + return vl_create_mpeg12_decoder(context, templ); + stream_type = RDECODE_CODEC_MPEG2_VLD; + break; + case PIPE_VIDEO_FORMAT_MPEG4: + width = align(width, VL_MACROBLOCK_WIDTH); + height = align(height, VL_MACROBLOCK_HEIGHT); + stream_type = RDECODE_CODEC_MPEG4; + break; + case PIPE_VIDEO_FORMAT_VC1: + stream_type = RDECODE_CODEC_VC1; + break; + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + width = align(width, VL_MACROBLOCK_WIDTH); + height = align(height, VL_MACROBLOCK_HEIGHT); + stream_type = RDECODE_CODEC_H264_PERF; + break; + case PIPE_VIDEO_FORMAT_HEVC: + stream_type = RDECODE_CODEC_H265; + break; + case PIPE_VIDEO_FORMAT_VP9: + stream_type = RDECODE_CODEC_VP9; + break; + case PIPE_VIDEO_FORMAT_JPEG: + stream_type = RDECODE_CODEC_JPEG; + ring = RING_VCN_JPEG; + break; + default: + assert(0); + break; + } + + dec = CALLOC_STRUCT(radeon_decoder); + + if (!dec) + return NULL; + + dec->base = *templ; + dec->base.context = context; + dec->base.width = width; + dec->base.height = height; + + dec->base.destroy = radeon_dec_destroy; + dec->base.begin_frame = radeon_dec_begin_frame; + dec->base.decode_macroblock = radeon_dec_decode_macroblock; + dec->base.decode_bitstream = radeon_dec_decode_bitstream; + dec->base.end_frame = radeon_dec_end_frame; + dec->base.flush = radeon_dec_flush; + + dec->stream_type = stream_type; + dec->stream_handle = si_vid_alloc_stream_handle(); + dec->screen = context->screen; + dec->ws = ws; + dec->cs = ws->cs_create(sctx->ctx, ring, NULL, NULL, false); + if (!dec->cs) { + RVID_ERR("Can't get command submission context.\n"); + goto error; + } + + for (i = 0; i < 16; i++) + dec->render_pic_list[i] = NULL; + bs_buf_size = width * height * (512 / (16 * 16)); + for (i = 0; i < NUM_BUFFERS; ++i) { + unsigned msg_fb_it_probs_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE; + if (have_it(dec)) + msg_fb_it_probs_size += IT_SCALING_TABLE_SIZE; + else if (have_probs(dec)) + msg_fb_it_probs_size += VP9_PROBS_TABLE_SIZE; + /* use vram to improve performance, workaround an unknown bug */ + if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_probs_buffers[i], msg_fb_it_probs_size, + PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated message buffers.\n"); + goto error; + } + + if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], bs_buf_size, + PIPE_USAGE_STAGING)) { + RVID_ERR("Can't allocated bitstream buffers.\n"); + goto error; + } + + si_vid_clear_buffer(context, &dec->msg_fb_it_probs_buffers[i]); + si_vid_clear_buffer(context, &dec->bs_buffers[i]); + + if (have_probs(dec)) { + struct rvid_buffer *buf; + void *ptr; + + buf = &dec->msg_fb_it_probs_buffers[i]; + ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE; + fill_probs_table(ptr); + dec->ws->buffer_unmap(buf->res->buf); + } + } + + dpb_size = calc_dpb_size(dec); + if (dpb_size) { + if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated dpb.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->dpb); + } + + if (dec->stream_type == RDECODE_CODEC_H264_PERF) { + unsigned ctx_size = calc_ctx_size_h264_perf(dec); + if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated context buffer.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->ctx); + } + + if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, RDECODE_SESSION_CONTEXT_SIZE, + PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated session ctx.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->sessionctx); + + if (sctx->family == CHIP_ARCTURUS) { + dec->reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0; + dec->reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1; + dec->reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD; + dec->reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL; + dec->jpg.direct_reg = true; + } else if (sctx->family >= CHIP_NAVI10 || sctx->family == CHIP_RENOIR) { + dec->reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0; + dec->reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1; + dec->reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD; + dec->reg.cntl = RDECODE_VCN2_ENGINE_CNTL; + dec->jpg.direct_reg = true; + } else { + dec->reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0; + dec->reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1; + dec->reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD; + dec->reg.cntl = RDECODE_VCN1_ENGINE_CNTL; + dec->jpg.direct_reg = false; + } + + map_msg_fb_it_probs_buf(dec); + rvcn_dec_message_create(dec); + send_msg_buf(dec); + r = flush(dec, 0); + if (r) + goto error; + + next_buffer(dec); + + if (stream_type == RDECODE_CODEC_JPEG) + dec->send_cmd = send_cmd_jpeg; + else + dec->send_cmd = send_cmd_dec; + + return &dec->base; error: - if (dec->cs) dec->ws->cs_destroy(dec->cs); + if (dec->cs) + dec->ws->cs_destroy(dec->cs); - for (i = 0; i < NUM_BUFFERS; ++i) { - si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); - si_vid_destroy_buffer(&dec->bs_buffers[i]); - } + for (i = 0; i < NUM_BUFFERS; ++i) { + si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); + si_vid_destroy_buffer(&dec->bs_buffers[i]); + } - si_vid_destroy_buffer(&dec->dpb); - si_vid_destroy_buffer(&dec->ctx); - si_vid_destroy_buffer(&dec->sessionctx); + si_vid_destroy_buffer(&dec->dpb); + si_vid_destroy_buffer(&dec->ctx); + si_vid_destroy_buffer(&dec->sessionctx); - FREE(dec); + FREE(dec); - return NULL; + return NULL; } diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.h b/src/gallium/drivers/radeon/radeon_vcn_dec.h index 7cd358a6129..5a080fdb44c 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_dec.h +++ b/src/gallium/drivers/radeon/radeon_vcn_dec.h @@ -30,783 +30,778 @@ #include "radeon_video.h" -#define RDECODE_PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30) -#define RDECODE_PKT_TYPE_G(x) (((x) >> 30) & 0x3) -#define RDECODE_PKT_TYPE_C 0x3FFFFFFF -#define RDECODE_PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16) -#define RDECODE_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) -#define RDECODE_PKT_COUNT_C 0xC000FFFF -#define RDECODE_PKT0_BASE_INDEX_S(x) (((unsigned)(x) & 0xFFFF) << 0) -#define RDECODE_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) -#define RDECODE_PKT0_BASE_INDEX_C 0xFFFF0000 -#define RDECODE_PKT0(index, count) (RDECODE_PKT_TYPE_S(0) | \ - RDECODE_PKT0_BASE_INDEX_S(index) | \ - RDECODE_PKT_COUNT_S(count)) - -#define RDECODE_PKT2() (RDECODE_PKT_TYPE_S(2)) - -#define RDECODE_PKT_REG_J(x) ((unsigned)(x) & 0x3FFFF) -#define RDECODE_PKT_RES_J(x) (((unsigned)(x) & 0x3F) << 18) -#define RDECODE_PKT_COND_J(x) (((unsigned)(x) & 0xF) << 24) -#define RDECODE_PKT_TYPE_J(x) (((unsigned)(x) & 0xF) << 28) -#define RDECODE_PKTJ(reg, cond, type) (RDECODE_PKT_REG_J(reg) | \ - RDECODE_PKT_RES_J(0) | \ - RDECODE_PKT_COND_J(cond) | \ - RDECODE_PKT_TYPE_J(type)) - -#define RDECODE_CMD_MSG_BUFFER 0x00000000 -#define RDECODE_CMD_DPB_BUFFER 0x00000001 -#define RDECODE_CMD_DECODING_TARGET_BUFFER 0x00000002 -#define RDECODE_CMD_FEEDBACK_BUFFER 0x00000003 -#define RDECODE_CMD_PROB_TBL_BUFFER 0x00000004 -#define RDECODE_CMD_SESSION_CONTEXT_BUFFER 0x00000005 -#define RDECODE_CMD_BITSTREAM_BUFFER 0x00000100 -#define RDECODE_CMD_IT_SCALING_TABLE_BUFFER 0x00000204 -#define RDECODE_CMD_CONTEXT_BUFFER 0x00000206 - -#define RDECODE_MSG_CREATE 0x00000000 -#define RDECODE_MSG_DECODE 0x00000001 -#define RDECODE_MSG_DESTROY 0x00000002 - -#define RDECODE_CODEC_H264 0x00000000 -#define RDECODE_CODEC_VC1 0x00000001 -#define RDECODE_CODEC_MPEG2_VLD 0x00000003 -#define RDECODE_CODEC_MPEG4 0x00000004 -#define RDECODE_CODEC_H264_PERF 0x00000007 -#define RDECODE_CODEC_JPEG 0x00000008 -#define RDECODE_CODEC_H265 0x00000010 -#define RDECODE_CODEC_VP9 0x00000011 - -#define RDECODE_ARRAY_MODE_LINEAR 0x00000000 -#define RDECODE_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001 -#define RDECODE_ARRAY_MODE_1D_THIN 0x00000002 -#define RDECODE_ARRAY_MODE_2D_THIN 0x00000004 -#define RDECODE_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004 -#define RDECODE_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005 - -#define RDECODE_H264_PROFILE_BASELINE 0x00000000 -#define RDECODE_H264_PROFILE_MAIN 0x00000001 -#define RDECODE_H264_PROFILE_HIGH 0x00000002 -#define RDECODE_H264_PROFILE_STEREO_HIGH 0x00000003 -#define RDECODE_H264_PROFILE_MVC 0x00000004 - -#define RDECODE_VC1_PROFILE_SIMPLE 0x00000000 -#define RDECODE_VC1_PROFILE_MAIN 0x00000001 -#define RDECODE_VC1_PROFILE_ADVANCED 0x00000002 - -#define RDECODE_SW_MODE_LINEAR 0x00000000 -#define RDECODE_256B_S 0x00000001 -#define RDECODE_256B_D 0x00000002 -#define RDECODE_4KB_S 0x00000005 -#define RDECODE_4KB_D 0x00000006 -#define RDECODE_64KB_S 0x00000009 -#define RDECODE_64KB_D 0x0000000A -#define RDECODE_4KB_S_X 0x00000015 -#define RDECODE_4KB_D_X 0x00000016 -#define RDECODE_64KB_S_X 0x00000019 -#define RDECODE_64KB_D_X 0x0000001A - -#define RDECODE_MESSAGE_NOT_SUPPORTED 0x00000000 -#define RDECODE_MESSAGE_CREATE 0x00000001 -#define RDECODE_MESSAGE_DECODE 0x00000002 -#define RDECODE_MESSAGE_AVC 0x00000006 -#define RDECODE_MESSAGE_VC1 0x00000007 -#define RDECODE_MESSAGE_MPEG2_VLD 0x0000000A -#define RDECODE_MESSAGE_MPEG4_ASP_VLD 0x0000000B -#define RDECODE_MESSAGE_HEVC 0x0000000D -#define RDECODE_MESSAGE_VP9 0x0000000E - -#define RDECODE_FEEDBACK_PROFILING 0x00000001 - -#define RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT 7 - -#define NUM_BUFFERS 4 - -#define RDECODE_VP9_PROBS_DATA_SIZE 2304 - -#define mmUVD_JPEG_CNTL 0x0200 -#define mmUVD_JPEG_CNTL_BASE_IDX 1 -#define mmUVD_JPEG_RB_BASE 0x0201 -#define mmUVD_JPEG_RB_BASE_BASE_IDX 1 -#define mmUVD_JPEG_RB_WPTR 0x0202 -#define mmUVD_JPEG_RB_WPTR_BASE_IDX 1 -#define mmUVD_JPEG_RB_RPTR 0x0203 -#define mmUVD_JPEG_RB_RPTR_BASE_IDX 1 -#define mmUVD_JPEG_RB_SIZE 0x0204 -#define mmUVD_JPEG_RB_SIZE_BASE_IDX 1 -#define mmUVD_JPEG_TIER_CNTL2 0x021a -#define mmUVD_JPEG_TIER_CNTL2_BASE_IDX 1 -#define mmUVD_JPEG_UV_TILING_CTRL 0x021c -#define mmUVD_JPEG_UV_TILING_CTRL_BASE_IDX 1 -#define mmUVD_JPEG_TILING_CTRL 0x021e -#define mmUVD_JPEG_TILING_CTRL_BASE_IDX 1 -#define mmUVD_JPEG_OUTBUF_RPTR 0x0220 -#define mmUVD_JPEG_OUTBUF_RPTR_BASE_IDX 1 -#define mmUVD_JPEG_OUTBUF_WPTR 0x0221 -#define mmUVD_JPEG_OUTBUF_WPTR_BASE_IDX 1 -#define mmUVD_JPEG_PITCH 0x0222 -#define mmUVD_JPEG_PITCH_BASE_IDX 1 -#define mmUVD_JPEG_INT_EN 0x0229 -#define mmUVD_JPEG_INT_EN_BASE_IDX 1 -#define mmUVD_JPEG_UV_PITCH 0x022b -#define mmUVD_JPEG_UV_PITCH_BASE_IDX 1 -#define mmUVD_JPEG_INDEX 0x023e -#define mmUVD_JPEG_INDEX_BASE_IDX 1 -#define mmUVD_JPEG_DATA 0x023f -#define mmUVD_JPEG_DATA_BASE_IDX 1 -#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x0438 -#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH_BASE_IDX 1 -#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x0439 -#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW_BASE_IDX 1 -#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x045a -#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH_BASE_IDX 1 -#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x045b -#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW_BASE_IDX 1 -#define mmUVD_CTX_INDEX 0x0528 -#define mmUVD_CTX_INDEX_BASE_IDX 1 -#define mmUVD_CTX_DATA 0x0529 -#define mmUVD_CTX_DATA_BASE_IDX 1 -#define mmUVD_SOFT_RESET 0x05a0 -#define mmUVD_SOFT_RESET_BASE_IDX 1 - -#define vcnipUVD_JPEG_DEC_SOFT_RST 0x402f -#define vcnipUVD_JRBC_IB_COND_RD_TIMER 0x408e -#define vcnipUVD_JRBC_IB_REF_DATA 0x408f -#define vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x40e1 -#define vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x40e0 -#define vcnipUVD_JPEG_RB_BASE 0x4001 -#define vcnipUVD_JPEG_RB_SIZE 0x4004 -#define vcnipUVD_JPEG_RB_WPTR 0x4002 -#define vcnipUVD_JPEG_PITCH 0x401f -#define vcnipUVD_JPEG_UV_PITCH 0x4020 -#define vcnipJPEG_DEC_ADDR_MODE 0x4027 -#define vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE 0x4024 -#define vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE 0x4025 -#define vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x40e3 -#define vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x40e2 -#define vcnipUVD_JPEG_INDEX 0x402c -#define vcnipUVD_JPEG_DATA 0x402d -#define vcnipUVD_JPEG_TIER_CNTL2 0x400f -#define vcnipUVD_JPEG_OUTBUF_RPTR 0x401e -#define vcnipUVD_JPEG_OUTBUF_CNTL 0x401c -#define vcnipUVD_JPEG_INT_EN 0x400a -#define vcnipUVD_JPEG_CNTL 0x4000 -#define vcnipUVD_JPEG_RB_RPTR 0x4003 -#define vcnipUVD_JPEG_OUTBUF_WPTR 0x401d - -#define UVD_BASE_INST0_SEG0 0x00007800 -#define UVD_BASE_INST0_SEG1 0x00007E00 -#define UVD_BASE_INST0_SEG2 0 -#define UVD_BASE_INST0_SEG3 0 -#define UVD_BASE_INST0_SEG4 0 - -#define SOC15_REG_ADDR(reg) (UVD_BASE_INST0_SEG1 + reg) - -#define COND0 0 -#define COND1 1 -#define COND2 2 -#define COND3 3 -#define COND4 4 -#define COND5 5 -#define COND6 6 -#define COND7 7 - -#define TYPE0 0 -#define TYPE1 1 -#define TYPE2 2 -#define TYPE3 3 -#define TYPE4 4 -#define TYPE5 5 -#define TYPE6 6 -#define TYPE7 7 +#define RDECODE_PKT_TYPE_S(x) (((unsigned)(x)&0x3) << 30) +#define RDECODE_PKT_TYPE_G(x) (((x) >> 30) & 0x3) +#define RDECODE_PKT_TYPE_C 0x3FFFFFFF +#define RDECODE_PKT_COUNT_S(x) (((unsigned)(x)&0x3FFF) << 16) +#define RDECODE_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) +#define RDECODE_PKT_COUNT_C 0xC000FFFF +#define RDECODE_PKT0_BASE_INDEX_S(x) (((unsigned)(x)&0xFFFF) << 0) +#define RDECODE_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) +#define RDECODE_PKT0_BASE_INDEX_C 0xFFFF0000 +#define RDECODE_PKT0(index, count) \ + (RDECODE_PKT_TYPE_S(0) | RDECODE_PKT0_BASE_INDEX_S(index) | RDECODE_PKT_COUNT_S(count)) + +#define RDECODE_PKT2() (RDECODE_PKT_TYPE_S(2)) + +#define RDECODE_PKT_REG_J(x) ((unsigned)(x)&0x3FFFF) +#define RDECODE_PKT_RES_J(x) (((unsigned)(x)&0x3F) << 18) +#define RDECODE_PKT_COND_J(x) (((unsigned)(x)&0xF) << 24) +#define RDECODE_PKT_TYPE_J(x) (((unsigned)(x)&0xF) << 28) +#define RDECODE_PKTJ(reg, cond, type) \ + (RDECODE_PKT_REG_J(reg) | RDECODE_PKT_RES_J(0) | RDECODE_PKT_COND_J(cond) | \ + RDECODE_PKT_TYPE_J(type)) + +#define RDECODE_CMD_MSG_BUFFER 0x00000000 +#define RDECODE_CMD_DPB_BUFFER 0x00000001 +#define RDECODE_CMD_DECODING_TARGET_BUFFER 0x00000002 +#define RDECODE_CMD_FEEDBACK_BUFFER 0x00000003 +#define RDECODE_CMD_PROB_TBL_BUFFER 0x00000004 +#define RDECODE_CMD_SESSION_CONTEXT_BUFFER 0x00000005 +#define RDECODE_CMD_BITSTREAM_BUFFER 0x00000100 +#define RDECODE_CMD_IT_SCALING_TABLE_BUFFER 0x00000204 +#define RDECODE_CMD_CONTEXT_BUFFER 0x00000206 + +#define RDECODE_MSG_CREATE 0x00000000 +#define RDECODE_MSG_DECODE 0x00000001 +#define RDECODE_MSG_DESTROY 0x00000002 + +#define RDECODE_CODEC_H264 0x00000000 +#define RDECODE_CODEC_VC1 0x00000001 +#define RDECODE_CODEC_MPEG2_VLD 0x00000003 +#define RDECODE_CODEC_MPEG4 0x00000004 +#define RDECODE_CODEC_H264_PERF 0x00000007 +#define RDECODE_CODEC_JPEG 0x00000008 +#define RDECODE_CODEC_H265 0x00000010 +#define RDECODE_CODEC_VP9 0x00000011 + +#define RDECODE_ARRAY_MODE_LINEAR 0x00000000 +#define RDECODE_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001 +#define RDECODE_ARRAY_MODE_1D_THIN 0x00000002 +#define RDECODE_ARRAY_MODE_2D_THIN 0x00000004 +#define RDECODE_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004 +#define RDECODE_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005 + +#define RDECODE_H264_PROFILE_BASELINE 0x00000000 +#define RDECODE_H264_PROFILE_MAIN 0x00000001 +#define RDECODE_H264_PROFILE_HIGH 0x00000002 +#define RDECODE_H264_PROFILE_STEREO_HIGH 0x00000003 +#define RDECODE_H264_PROFILE_MVC 0x00000004 + +#define RDECODE_VC1_PROFILE_SIMPLE 0x00000000 +#define RDECODE_VC1_PROFILE_MAIN 0x00000001 +#define RDECODE_VC1_PROFILE_ADVANCED 0x00000002 + +#define RDECODE_SW_MODE_LINEAR 0x00000000 +#define RDECODE_256B_S 0x00000001 +#define RDECODE_256B_D 0x00000002 +#define RDECODE_4KB_S 0x00000005 +#define RDECODE_4KB_D 0x00000006 +#define RDECODE_64KB_S 0x00000009 +#define RDECODE_64KB_D 0x0000000A +#define RDECODE_4KB_S_X 0x00000015 +#define RDECODE_4KB_D_X 0x00000016 +#define RDECODE_64KB_S_X 0x00000019 +#define RDECODE_64KB_D_X 0x0000001A + +#define RDECODE_MESSAGE_NOT_SUPPORTED 0x00000000 +#define RDECODE_MESSAGE_CREATE 0x00000001 +#define RDECODE_MESSAGE_DECODE 0x00000002 +#define RDECODE_MESSAGE_AVC 0x00000006 +#define RDECODE_MESSAGE_VC1 0x00000007 +#define RDECODE_MESSAGE_MPEG2_VLD 0x0000000A +#define RDECODE_MESSAGE_MPEG4_ASP_VLD 0x0000000B +#define RDECODE_MESSAGE_HEVC 0x0000000D +#define RDECODE_MESSAGE_VP9 0x0000000E + +#define RDECODE_FEEDBACK_PROFILING 0x00000001 + +#define RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT 7 + +#define NUM_BUFFERS 4 + +#define RDECODE_VP9_PROBS_DATA_SIZE 2304 + +#define mmUVD_JPEG_CNTL 0x0200 +#define mmUVD_JPEG_CNTL_BASE_IDX 1 +#define mmUVD_JPEG_RB_BASE 0x0201 +#define mmUVD_JPEG_RB_BASE_BASE_IDX 1 +#define mmUVD_JPEG_RB_WPTR 0x0202 +#define mmUVD_JPEG_RB_WPTR_BASE_IDX 1 +#define mmUVD_JPEG_RB_RPTR 0x0203 +#define mmUVD_JPEG_RB_RPTR_BASE_IDX 1 +#define mmUVD_JPEG_RB_SIZE 0x0204 +#define mmUVD_JPEG_RB_SIZE_BASE_IDX 1 +#define mmUVD_JPEG_TIER_CNTL2 0x021a +#define mmUVD_JPEG_TIER_CNTL2_BASE_IDX 1 +#define mmUVD_JPEG_UV_TILING_CTRL 0x021c +#define mmUVD_JPEG_UV_TILING_CTRL_BASE_IDX 1 +#define mmUVD_JPEG_TILING_CTRL 0x021e +#define mmUVD_JPEG_TILING_CTRL_BASE_IDX 1 +#define mmUVD_JPEG_OUTBUF_RPTR 0x0220 +#define mmUVD_JPEG_OUTBUF_RPTR_BASE_IDX 1 +#define mmUVD_JPEG_OUTBUF_WPTR 0x0221 +#define mmUVD_JPEG_OUTBUF_WPTR_BASE_IDX 1 +#define mmUVD_JPEG_PITCH 0x0222 +#define mmUVD_JPEG_PITCH_BASE_IDX 1 +#define mmUVD_JPEG_INT_EN 0x0229 +#define mmUVD_JPEG_INT_EN_BASE_IDX 1 +#define mmUVD_JPEG_UV_PITCH 0x022b +#define mmUVD_JPEG_UV_PITCH_BASE_IDX 1 +#define mmUVD_JPEG_INDEX 0x023e +#define mmUVD_JPEG_INDEX_BASE_IDX 1 +#define mmUVD_JPEG_DATA 0x023f +#define mmUVD_JPEG_DATA_BASE_IDX 1 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x0438 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH_BASE_IDX 1 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x0439 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW_BASE_IDX 1 +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x045a +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH_BASE_IDX 1 +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x045b +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW_BASE_IDX 1 +#define mmUVD_CTX_INDEX 0x0528 +#define mmUVD_CTX_INDEX_BASE_IDX 1 +#define mmUVD_CTX_DATA 0x0529 +#define mmUVD_CTX_DATA_BASE_IDX 1 +#define mmUVD_SOFT_RESET 0x05a0 +#define mmUVD_SOFT_RESET_BASE_IDX 1 + +#define vcnipUVD_JPEG_DEC_SOFT_RST 0x402f +#define vcnipUVD_JRBC_IB_COND_RD_TIMER 0x408e +#define vcnipUVD_JRBC_IB_REF_DATA 0x408f +#define vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x40e1 +#define vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x40e0 +#define vcnipUVD_JPEG_RB_BASE 0x4001 +#define vcnipUVD_JPEG_RB_SIZE 0x4004 +#define vcnipUVD_JPEG_RB_WPTR 0x4002 +#define vcnipUVD_JPEG_PITCH 0x401f +#define vcnipUVD_JPEG_UV_PITCH 0x4020 +#define vcnipJPEG_DEC_ADDR_MODE 0x4027 +#define vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE 0x4024 +#define vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE 0x4025 +#define vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x40e3 +#define vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x40e2 +#define vcnipUVD_JPEG_INDEX 0x402c +#define vcnipUVD_JPEG_DATA 0x402d +#define vcnipUVD_JPEG_TIER_CNTL2 0x400f +#define vcnipUVD_JPEG_OUTBUF_RPTR 0x401e +#define vcnipUVD_JPEG_OUTBUF_CNTL 0x401c +#define vcnipUVD_JPEG_INT_EN 0x400a +#define vcnipUVD_JPEG_CNTL 0x4000 +#define vcnipUVD_JPEG_RB_RPTR 0x4003 +#define vcnipUVD_JPEG_OUTBUF_WPTR 0x401d + +#define UVD_BASE_INST0_SEG0 0x00007800 +#define UVD_BASE_INST0_SEG1 0x00007E00 +#define UVD_BASE_INST0_SEG2 0 +#define UVD_BASE_INST0_SEG3 0 +#define UVD_BASE_INST0_SEG4 0 + +#define SOC15_REG_ADDR(reg) (UVD_BASE_INST0_SEG1 + reg) + +#define COND0 0 +#define COND1 1 +#define COND2 2 +#define COND3 3 +#define COND4 4 +#define COND5 5 +#define COND6 6 +#define COND7 7 + +#define TYPE0 0 +#define TYPE1 1 +#define TYPE2 2 +#define TYPE3 3 +#define TYPE4 4 +#define TYPE5 5 +#define TYPE6 6 +#define TYPE7 7 /* VP9 Frame header flags */ -#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT (13) -#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT (12) -#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT (11) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_SHIFT (10) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT (9) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT (8) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT (7) -#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT (6) -#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT (5) -#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT (4) -#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT (3) -#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT (2) -#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT (1) -#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_SHIFT (0) - -#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK (0x00002000) -#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK (0x00001000) -#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK (0x00000800) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_MASK (0x00000400) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK (0x00000200) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK (0x00000100) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK (0x00000080) -#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK (0x00000040) -#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK (0x00000020) -#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK (0x00000010) -#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK (0x00000008) -#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK (0x00000004) -#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK (0x00000002) -#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_MASK (0x00000001) +#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT (13) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT (12) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT (11) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_SHIFT (10) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT (9) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT (8) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT (7) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT (6) +#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT (5) +#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT (4) +#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT (3) +#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT (2) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT (1) +#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_SHIFT (0) + +#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK (0x00002000) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK (0x00001000) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK (0x00000800) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_MASK (0x00000400) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK (0x00000200) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK (0x00000100) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK (0x00000080) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK (0x00000040) +#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK (0x00000020) +#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK (0x00000010) +#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK (0x00000008) +#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK (0x00000004) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK (0x00000002) +#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_MASK (0x00000001) typedef struct rvcn_dec_message_index_s { - unsigned int message_id; - unsigned int offset; - unsigned int size; - unsigned int filled; + unsigned int message_id; + unsigned int offset; + unsigned int size; + unsigned int filled; } rvcn_dec_message_index_t; typedef struct rvcn_dec_message_header_s { - unsigned int header_size; - unsigned int total_size; - unsigned int num_buffers; - unsigned int msg_type; - unsigned int stream_handle; - unsigned int status_report_feedback_number; - - rvcn_dec_message_index_t index[1]; + unsigned int header_size; + unsigned int total_size; + unsigned int num_buffers; + unsigned int msg_type; + unsigned int stream_handle; + unsigned int status_report_feedback_number; + + rvcn_dec_message_index_t index[1]; } rvcn_dec_message_header_t; typedef struct rvcn_dec_message_create_s { - unsigned int stream_type; - unsigned int session_flags; - unsigned int width_in_samples; - unsigned int height_in_samples; + unsigned int stream_type; + unsigned int session_flags; + unsigned int width_in_samples; + unsigned int height_in_samples; } rvcn_dec_message_create_t; typedef struct rvcn_dec_message_decode_s { - unsigned int stream_type; - unsigned int decode_flags; - unsigned int width_in_samples; - unsigned int height_in_samples; - - unsigned int bsd_size; - unsigned int dpb_size; - unsigned int dt_size; - unsigned int sct_size; - unsigned int sc_coeff_size; - unsigned int hw_ctxt_size; - unsigned int sw_ctxt_size; - unsigned int pic_param_size; - unsigned int mb_cntl_size; - unsigned int reserved0[4]; - unsigned int decode_buffer_flags; - - unsigned int db_pitch; - unsigned int db_aligned_height; - unsigned int db_tiling_mode; - unsigned int db_swizzle_mode; - unsigned int db_array_mode; - unsigned int db_field_mode; - unsigned int db_surf_tile_config; - - unsigned int dt_pitch; - unsigned int dt_uv_pitch; - unsigned int dt_tiling_mode; - unsigned int dt_swizzle_mode; - unsigned int dt_array_mode; - unsigned int dt_field_mode; - unsigned int dt_out_format; - unsigned int dt_surf_tile_config; - unsigned int dt_uv_surf_tile_config; - unsigned int dt_luma_top_offset; - unsigned int dt_luma_bottom_offset; - unsigned int dt_chroma_top_offset; - unsigned int dt_chroma_bottom_offset; - unsigned int dt_chromaV_top_offset; - unsigned int dt_chromaV_bottom_offset; - - unsigned char dpbRefArraySlice[16]; - unsigned char dpbCurArraySlice; - unsigned char dpbReserved[3]; + unsigned int stream_type; + unsigned int decode_flags; + unsigned int width_in_samples; + unsigned int height_in_samples; + + unsigned int bsd_size; + unsigned int dpb_size; + unsigned int dt_size; + unsigned int sct_size; + unsigned int sc_coeff_size; + unsigned int hw_ctxt_size; + unsigned int sw_ctxt_size; + unsigned int pic_param_size; + unsigned int mb_cntl_size; + unsigned int reserved0[4]; + unsigned int decode_buffer_flags; + + unsigned int db_pitch; + unsigned int db_aligned_height; + unsigned int db_tiling_mode; + unsigned int db_swizzle_mode; + unsigned int db_array_mode; + unsigned int db_field_mode; + unsigned int db_surf_tile_config; + + unsigned int dt_pitch; + unsigned int dt_uv_pitch; + unsigned int dt_tiling_mode; + unsigned int dt_swizzle_mode; + unsigned int dt_array_mode; + unsigned int dt_field_mode; + unsigned int dt_out_format; + unsigned int dt_surf_tile_config; + unsigned int dt_uv_surf_tile_config; + unsigned int dt_luma_top_offset; + unsigned int dt_luma_bottom_offset; + unsigned int dt_chroma_top_offset; + unsigned int dt_chroma_bottom_offset; + unsigned int dt_chromaV_top_offset; + unsigned int dt_chromaV_bottom_offset; + + unsigned char dpbRefArraySlice[16]; + unsigned char dpbCurArraySlice; + unsigned char dpbReserved[3]; } rvcn_dec_message_decode_t; typedef struct { - unsigned short viewOrderIndex; - unsigned short viewId; - unsigned short numOfAnchorRefsInL0; - unsigned short viewIdOfAnchorRefsInL0[15]; - unsigned short numOfAnchorRefsInL1; - unsigned short viewIdOfAnchorRefsInL1[15]; - unsigned short numOfNonAnchorRefsInL0; - unsigned short viewIdOfNonAnchorRefsInL0[15]; - unsigned short numOfNonAnchorRefsInL1; - unsigned short viewIdOfNonAnchorRefsInL1[15]; + unsigned short viewOrderIndex; + unsigned short viewId; + unsigned short numOfAnchorRefsInL0; + unsigned short viewIdOfAnchorRefsInL0[15]; + unsigned short numOfAnchorRefsInL1; + unsigned short viewIdOfAnchorRefsInL1[15]; + unsigned short numOfNonAnchorRefsInL0; + unsigned short viewIdOfNonAnchorRefsInL0[15]; + unsigned short numOfNonAnchorRefsInL1; + unsigned short viewIdOfNonAnchorRefsInL1[15]; } radeon_mvcElement_t; typedef struct rvcn_dec_message_avc_s { - unsigned int profile; - unsigned int level; - - unsigned int sps_info_flags; - unsigned int pps_info_flags; - unsigned char chroma_format; - unsigned char bit_depth_luma_minus8; - unsigned char bit_depth_chroma_minus8; - unsigned char log2_max_frame_num_minus4; - - unsigned char pic_order_cnt_type; - unsigned char log2_max_pic_order_cnt_lsb_minus4; - unsigned char num_ref_frames; - unsigned char reserved_8bit; - - signed char pic_init_qp_minus26; - signed char pic_init_qs_minus26; - signed char chroma_qp_index_offset; - signed char second_chroma_qp_index_offset; - - unsigned char num_slice_groups_minus1; - unsigned char slice_group_map_type; - unsigned char num_ref_idx_l0_active_minus1; - unsigned char num_ref_idx_l1_active_minus1; - - unsigned short slice_group_change_rate_minus1; - unsigned short reserved_16bit_1; - - unsigned char scaling_list_4x4[6][16]; - unsigned char scaling_list_8x8[2][64]; - - unsigned int frame_num; - unsigned int frame_num_list[16]; - int curr_field_order_cnt_list[2]; - int field_order_cnt_list[16][2]; - - unsigned int decoded_pic_idx; - unsigned int curr_pic_ref_frame_num; - unsigned char ref_frame_list[16]; - - unsigned int reserved[122]; - - struct { - unsigned int numViews; - unsigned int viewId0; - radeon_mvcElement_t mvcElements[1]; - } mvc; + unsigned int profile; + unsigned int level; + + unsigned int sps_info_flags; + unsigned int pps_info_flags; + unsigned char chroma_format; + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + unsigned char log2_max_frame_num_minus4; + + unsigned char pic_order_cnt_type; + unsigned char log2_max_pic_order_cnt_lsb_minus4; + unsigned char num_ref_frames; + unsigned char reserved_8bit; + + signed char pic_init_qp_minus26; + signed char pic_init_qs_minus26; + signed char chroma_qp_index_offset; + signed char second_chroma_qp_index_offset; + + unsigned char num_slice_groups_minus1; + unsigned char slice_group_map_type; + unsigned char num_ref_idx_l0_active_minus1; + unsigned char num_ref_idx_l1_active_minus1; + + unsigned short slice_group_change_rate_minus1; + unsigned short reserved_16bit_1; + + unsigned char scaling_list_4x4[6][16]; + unsigned char scaling_list_8x8[2][64]; + + unsigned int frame_num; + unsigned int frame_num_list[16]; + int curr_field_order_cnt_list[2]; + int field_order_cnt_list[16][2]; + + unsigned int decoded_pic_idx; + unsigned int curr_pic_ref_frame_num; + unsigned char ref_frame_list[16]; + + unsigned int reserved[122]; + + struct { + unsigned int numViews; + unsigned int viewId0; + radeon_mvcElement_t mvcElements[1]; + } mvc; } rvcn_dec_message_avc_t; typedef struct rvcn_dec_message_vc1_s { - unsigned int profile; - unsigned int level; - unsigned int sps_info_flags; - unsigned int pps_info_flags; - unsigned int pic_structure; - unsigned int chroma_format; - unsigned short decoded_pic_idx; - unsigned short deblocked_pic_idx; - unsigned short forward_ref_idx; - unsigned short backward_ref_idx; - unsigned int cached_frame_flag; + unsigned int profile; + unsigned int level; + unsigned int sps_info_flags; + unsigned int pps_info_flags; + unsigned int pic_structure; + unsigned int chroma_format; + unsigned short decoded_pic_idx; + unsigned short deblocked_pic_idx; + unsigned short forward_ref_idx; + unsigned short backward_ref_idx; + unsigned int cached_frame_flag; } rvcn_dec_message_vc1_t; typedef struct rvcn_dec_message_mpeg2_vld_s { - unsigned int decoded_pic_idx; - unsigned int forward_ref_pic_idx; - unsigned int backward_ref_pic_idx; - - unsigned char load_intra_quantiser_matrix; - unsigned char load_nonintra_quantiser_matrix; - unsigned char reserved_quantiser_alignement[2]; - unsigned char intra_quantiser_matrix[64]; - unsigned char nonintra_quantiser_matrix[64]; - - unsigned char profile_and_level_indication; - unsigned char chroma_format; - - unsigned char picture_coding_type; - - unsigned char reserved_1; - - unsigned char f_code[2][2]; - unsigned char intra_dc_precision; - unsigned char pic_structure; - unsigned char top_field_first; - unsigned char frame_pred_frame_dct; - unsigned char concealment_motion_vectors; - unsigned char q_scale_type; - unsigned char intra_vlc_format; - unsigned char alternate_scan; + unsigned int decoded_pic_idx; + unsigned int forward_ref_pic_idx; + unsigned int backward_ref_pic_idx; + + unsigned char load_intra_quantiser_matrix; + unsigned char load_nonintra_quantiser_matrix; + unsigned char reserved_quantiser_alignement[2]; + unsigned char intra_quantiser_matrix[64]; + unsigned char nonintra_quantiser_matrix[64]; + + unsigned char profile_and_level_indication; + unsigned char chroma_format; + + unsigned char picture_coding_type; + + unsigned char reserved_1; + + unsigned char f_code[2][2]; + unsigned char intra_dc_precision; + unsigned char pic_structure; + unsigned char top_field_first; + unsigned char frame_pred_frame_dct; + unsigned char concealment_motion_vectors; + unsigned char q_scale_type; + unsigned char intra_vlc_format; + unsigned char alternate_scan; } rvcn_dec_message_mpeg2_vld_t; typedef struct rvcn_dec_message_mpeg4_asp_vld_s { - unsigned int decoded_pic_idx; - unsigned int forward_ref_pic_idx; - unsigned int backward_ref_pic_idx; - - unsigned int variant_type; - unsigned char profile_and_level_indication; - - unsigned char video_object_layer_verid; - unsigned char video_object_layer_shape; - - unsigned char reserved_1; - - unsigned short video_object_layer_width; - unsigned short video_object_layer_height; - - unsigned short vop_time_increment_resolution; - - unsigned short reserved_2; - - struct { - unsigned int short_video_header :1; - unsigned int obmc_disable :1; - unsigned int interlaced :1; - unsigned int load_intra_quant_mat :1; - unsigned int load_nonintra_quant_mat :1; - unsigned int quarter_sample :1; - unsigned int complexity_estimation_disable :1; - unsigned int resync_marker_disable :1; - unsigned int data_partitioned :1; - unsigned int reversible_vlc :1; - unsigned int newpred_enable :1; - unsigned int reduced_resolution_vop_enable :1; - unsigned int scalability :1; - unsigned int is_object_layer_identifier :1; - unsigned int fixed_vop_rate :1; - unsigned int newpred_segment_type :1; - unsigned int reserved_bits :16; - }; - - unsigned char quant_type; - unsigned char reserved_3[3]; - unsigned char intra_quant_mat[64]; - unsigned char nonintra_quant_mat[64]; - - struct { - unsigned char sprite_enable; - - unsigned char reserved_4[3]; - - unsigned short sprite_width; - unsigned short sprite_height; - short sprite_left_coordinate; - short sprite_top_coordinate; - - unsigned char no_of_sprite_warping_points; - unsigned char sprite_warping_accuracy; - unsigned char sprite_brightness_change; - unsigned char low_latency_sprite_enable; - } sprite_config; - - struct { - struct { - unsigned int check_skip :1; - unsigned int switch_rounding :1; - unsigned int t311 :1; - unsigned int reserved_bits :29; - }; - - unsigned char vol_mode; - - unsigned char reserved_5[3]; - } divx_311_config; - - struct { - unsigned char vop_data_present; - unsigned char vop_coding_type; - unsigned char vop_quant; - unsigned char vop_coded; - unsigned char vop_rounding_type; - unsigned char intra_dc_vlc_thr; - unsigned char top_field_first; - unsigned char alternate_vertical_scan_flag; - unsigned char vop_fcode_forward; - unsigned char vop_fcode_backward; - unsigned int TRB[2]; - unsigned int TRD[2]; - } vop; + unsigned int decoded_pic_idx; + unsigned int forward_ref_pic_idx; + unsigned int backward_ref_pic_idx; + + unsigned int variant_type; + unsigned char profile_and_level_indication; + + unsigned char video_object_layer_verid; + unsigned char video_object_layer_shape; + + unsigned char reserved_1; + + unsigned short video_object_layer_width; + unsigned short video_object_layer_height; + + unsigned short vop_time_increment_resolution; + + unsigned short reserved_2; + + struct { + unsigned int short_video_header : 1; + unsigned int obmc_disable : 1; + unsigned int interlaced : 1; + unsigned int load_intra_quant_mat : 1; + unsigned int load_nonintra_quant_mat : 1; + unsigned int quarter_sample : 1; + unsigned int complexity_estimation_disable : 1; + unsigned int resync_marker_disable : 1; + unsigned int data_partitioned : 1; + unsigned int reversible_vlc : 1; + unsigned int newpred_enable : 1; + unsigned int reduced_resolution_vop_enable : 1; + unsigned int scalability : 1; + unsigned int is_object_layer_identifier : 1; + unsigned int fixed_vop_rate : 1; + unsigned int newpred_segment_type : 1; + unsigned int reserved_bits : 16; + }; + + unsigned char quant_type; + unsigned char reserved_3[3]; + unsigned char intra_quant_mat[64]; + unsigned char nonintra_quant_mat[64]; + + struct { + unsigned char sprite_enable; + + unsigned char reserved_4[3]; + + unsigned short sprite_width; + unsigned short sprite_height; + short sprite_left_coordinate; + short sprite_top_coordinate; + + unsigned char no_of_sprite_warping_points; + unsigned char sprite_warping_accuracy; + unsigned char sprite_brightness_change; + unsigned char low_latency_sprite_enable; + } sprite_config; + + struct { + struct { + unsigned int check_skip : 1; + unsigned int switch_rounding : 1; + unsigned int t311 : 1; + unsigned int reserved_bits : 29; + }; + + unsigned char vol_mode; + + unsigned char reserved_5[3]; + } divx_311_config; + + struct { + unsigned char vop_data_present; + unsigned char vop_coding_type; + unsigned char vop_quant; + unsigned char vop_coded; + unsigned char vop_rounding_type; + unsigned char intra_dc_vlc_thr; + unsigned char top_field_first; + unsigned char alternate_vertical_scan_flag; + unsigned char vop_fcode_forward; + unsigned char vop_fcode_backward; + unsigned int TRB[2]; + unsigned int TRD[2]; + } vop; } rvcn_dec_message_mpeg4_asp_vld_t; typedef struct rvcn_dec_message_hevc_s { - unsigned int sps_info_flags; - unsigned int pps_info_flags; - unsigned char chroma_format; - unsigned char bit_depth_luma_minus8; - unsigned char bit_depth_chroma_minus8; - unsigned char log2_max_pic_order_cnt_lsb_minus4; - - unsigned char sps_max_dec_pic_buffering_minus1; - unsigned char log2_min_luma_coding_block_size_minus3; - unsigned char log2_diff_max_min_luma_coding_block_size; - unsigned char log2_min_transform_block_size_minus2; - - unsigned char log2_diff_max_min_transform_block_size; - unsigned char max_transform_hierarchy_depth_inter; - unsigned char max_transform_hierarchy_depth_intra; - unsigned char pcm_sample_bit_depth_luma_minus1; - - unsigned char pcm_sample_bit_depth_chroma_minus1; - unsigned char log2_min_pcm_luma_coding_block_size_minus3; - unsigned char log2_diff_max_min_pcm_luma_coding_block_size; - unsigned char num_extra_slice_header_bits; - - unsigned char num_short_term_ref_pic_sets; - unsigned char num_long_term_ref_pic_sps; - unsigned char num_ref_idx_l0_default_active_minus1; - unsigned char num_ref_idx_l1_default_active_minus1; - - signed char pps_cb_qp_offset; - signed char pps_cr_qp_offset; - signed char pps_beta_offset_div2; - signed char pps_tc_offset_div2; - - unsigned char diff_cu_qp_delta_depth; - unsigned char num_tile_columns_minus1; - unsigned char num_tile_rows_minus1; - unsigned char log2_parallel_merge_level_minus2; - - unsigned short column_width_minus1[19]; - unsigned short row_height_minus1[21]; - - signed char init_qp_minus26; - unsigned char num_delta_pocs_ref_rps_idx; - unsigned char curr_idx; - unsigned char reserved[1]; - int curr_poc; - unsigned char ref_pic_list[16]; - int poc_list[16]; - unsigned char ref_pic_set_st_curr_before[8]; - unsigned char ref_pic_set_st_curr_after[8]; - unsigned char ref_pic_set_lt_curr[8]; - - unsigned char ucScalingListDCCoefSizeID2[6]; - unsigned char ucScalingListDCCoefSizeID3[2]; - - unsigned char highestTid; - unsigned char isNonRef; - - unsigned char p010_mode; - unsigned char msb_mode; - unsigned char luma_10to8; - unsigned char chroma_10to8; - - unsigned char hevc_reserved[2]; - - unsigned char direct_reflist[2][15]; + unsigned int sps_info_flags; + unsigned int pps_info_flags; + unsigned char chroma_format; + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + unsigned char log2_max_pic_order_cnt_lsb_minus4; + + unsigned char sps_max_dec_pic_buffering_minus1; + unsigned char log2_min_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_luma_coding_block_size; + unsigned char log2_min_transform_block_size_minus2; + + unsigned char log2_diff_max_min_transform_block_size; + unsigned char max_transform_hierarchy_depth_inter; + unsigned char max_transform_hierarchy_depth_intra; + unsigned char pcm_sample_bit_depth_luma_minus1; + + unsigned char pcm_sample_bit_depth_chroma_minus1; + unsigned char log2_min_pcm_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_pcm_luma_coding_block_size; + unsigned char num_extra_slice_header_bits; + + unsigned char num_short_term_ref_pic_sets; + unsigned char num_long_term_ref_pic_sps; + unsigned char num_ref_idx_l0_default_active_minus1; + unsigned char num_ref_idx_l1_default_active_minus1; + + signed char pps_cb_qp_offset; + signed char pps_cr_qp_offset; + signed char pps_beta_offset_div2; + signed char pps_tc_offset_div2; + + unsigned char diff_cu_qp_delta_depth; + unsigned char num_tile_columns_minus1; + unsigned char num_tile_rows_minus1; + unsigned char log2_parallel_merge_level_minus2; + + unsigned short column_width_minus1[19]; + unsigned short row_height_minus1[21]; + + signed char init_qp_minus26; + unsigned char num_delta_pocs_ref_rps_idx; + unsigned char curr_idx; + unsigned char reserved[1]; + int curr_poc; + unsigned char ref_pic_list[16]; + int poc_list[16]; + unsigned char ref_pic_set_st_curr_before[8]; + unsigned char ref_pic_set_st_curr_after[8]; + unsigned char ref_pic_set_lt_curr[8]; + + unsigned char ucScalingListDCCoefSizeID2[6]; + unsigned char ucScalingListDCCoefSizeID3[2]; + + unsigned char highestTid; + unsigned char isNonRef; + + unsigned char p010_mode; + unsigned char msb_mode; + unsigned char luma_10to8; + unsigned char chroma_10to8; + + unsigned char hevc_reserved[2]; + + unsigned char direct_reflist[2][15]; } rvcn_dec_message_hevc_t; typedef struct rvcn_dec_message_vp9_s { - unsigned int frame_header_flags; - - unsigned char frame_context_idx; - unsigned char reset_frame_context; - - unsigned char curr_pic_idx; - unsigned char interp_filter; - - unsigned char filter_level; - unsigned char sharpness_level; - unsigned char lf_adj_level[8][4][2]; - unsigned char base_qindex; - signed char y_dc_delta_q; - signed char uv_ac_delta_q; - signed char uv_dc_delta_q; - - unsigned char log2_tile_cols; - unsigned char log2_tile_rows; - unsigned char tx_mode; - unsigned char reference_mode; - unsigned char chroma_format; - - unsigned char ref_frame_map[8]; - - unsigned char frame_refs[3]; - unsigned char ref_frame_sign_bias[3]; - unsigned char frame_to_show; - unsigned char bit_depth_luma_minus8; - unsigned char bit_depth_chroma_minus8; - - unsigned char p010_mode; - unsigned char msb_mode; - unsigned char luma_10to8; - unsigned char chroma_10to8; - - unsigned int vp9_frame_size; - unsigned int compressed_header_size; - unsigned int uncompressed_header_size; + unsigned int frame_header_flags; + + unsigned char frame_context_idx; + unsigned char reset_frame_context; + + unsigned char curr_pic_idx; + unsigned char interp_filter; + + unsigned char filter_level; + unsigned char sharpness_level; + unsigned char lf_adj_level[8][4][2]; + unsigned char base_qindex; + signed char y_dc_delta_q; + signed char uv_ac_delta_q; + signed char uv_dc_delta_q; + + unsigned char log2_tile_cols; + unsigned char log2_tile_rows; + unsigned char tx_mode; + unsigned char reference_mode; + unsigned char chroma_format; + + unsigned char ref_frame_map[8]; + + unsigned char frame_refs[3]; + unsigned char ref_frame_sign_bias[3]; + unsigned char frame_to_show; + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + + unsigned char p010_mode; + unsigned char msb_mode; + unsigned char luma_10to8; + unsigned char chroma_10to8; + + unsigned int vp9_frame_size; + unsigned int compressed_header_size; + unsigned int uncompressed_header_size; } rvcn_dec_message_vp9_t; typedef struct rvcn_dec_feature_index_s { - unsigned int feature_id; - unsigned int offset; - unsigned int size; - unsigned int filled; + unsigned int feature_id; + unsigned int offset; + unsigned int size; + unsigned int filled; } rvcn_dec_feature_index_t; typedef struct rvcn_dec_feedback_header_s { - unsigned int header_size; - unsigned int total_size; - unsigned int num_buffers; - unsigned int status_report_feedback_number; - unsigned int status; - unsigned int value; - unsigned int errorBits; - rvcn_dec_feature_index_t index[1]; + unsigned int header_size; + unsigned int total_size; + unsigned int num_buffers; + unsigned int status_report_feedback_number; + unsigned int status; + unsigned int value; + unsigned int errorBits; + rvcn_dec_feature_index_t index[1]; } rvcn_dec_feedback_header_t; typedef struct rvcn_dec_feedback_profiling_s { - unsigned int size; - - unsigned int decodingTime; - unsigned int decodePlusOverhead; - unsigned int masterTimerHits; - unsigned int uvdLBSIREWaitCount; - - unsigned int avgMPCMemLatency; - unsigned int maxMPCMemLatency; - unsigned int uvdMPCLumaHits; - unsigned int uvdMPCLumaHitPend; - unsigned int uvdMPCLumaSearch; - unsigned int uvdMPCChromaHits; - unsigned int uvdMPCChromaHitPend; - unsigned int uvdMPCChromaSearch; - - unsigned int uvdLMIPerfCountLo; - unsigned int uvdLMIPerfCountHi; - unsigned int uvdLMIAvgLatCntrEnvHit; - unsigned int uvdLMILatCntr; - - unsigned int frameCRC0; - unsigned int frameCRC1; - unsigned int frameCRC2; - unsigned int frameCRC3; - - unsigned int uvdLMIPerfMonCtrl; - unsigned int uvdLMILatCtrl; - unsigned int uvdMPCCntl; - unsigned int reserved0[4]; - unsigned int decoderID; - unsigned int codec; - - unsigned int dmaHwCrc32Enable; - unsigned int dmaHwCrc32Value; - unsigned int dmaHwCrc32Value2; + unsigned int size; + + unsigned int decodingTime; + unsigned int decodePlusOverhead; + unsigned int masterTimerHits; + unsigned int uvdLBSIREWaitCount; + + unsigned int avgMPCMemLatency; + unsigned int maxMPCMemLatency; + unsigned int uvdMPCLumaHits; + unsigned int uvdMPCLumaHitPend; + unsigned int uvdMPCLumaSearch; + unsigned int uvdMPCChromaHits; + unsigned int uvdMPCChromaHitPend; + unsigned int uvdMPCChromaSearch; + + unsigned int uvdLMIPerfCountLo; + unsigned int uvdLMIPerfCountHi; + unsigned int uvdLMIAvgLatCntrEnvHit; + unsigned int uvdLMILatCntr; + + unsigned int frameCRC0; + unsigned int frameCRC1; + unsigned int frameCRC2; + unsigned int frameCRC3; + + unsigned int uvdLMIPerfMonCtrl; + unsigned int uvdLMILatCtrl; + unsigned int uvdMPCCntl; + unsigned int reserved0[4]; + unsigned int decoderID; + unsigned int codec; + + unsigned int dmaHwCrc32Enable; + unsigned int dmaHwCrc32Value; + unsigned int dmaHwCrc32Value2; } rvcn_dec_feedback_profiling_t; typedef struct rvcn_dec_vp9_nmv_ctx_mask_s { - unsigned short classes_mask[2]; - unsigned short bits_mask[2]; - unsigned char joints_mask; - unsigned char sign_mask[2]; - unsigned char class0_mask[2]; - unsigned char class0_fp_mask[2]; - unsigned char fp_mask[2]; - unsigned char class0_hp_mask[2]; - unsigned char hp_mask[2]; - unsigned char reserve[11]; + unsigned short classes_mask[2]; + unsigned short bits_mask[2]; + unsigned char joints_mask; + unsigned char sign_mask[2]; + unsigned char class0_mask[2]; + unsigned char class0_fp_mask[2]; + unsigned char fp_mask[2]; + unsigned char class0_hp_mask[2]; + unsigned char hp_mask[2]; + unsigned char reserve[11]; } rvcn_dec_vp9_nmv_ctx_mask_t; -typedef struct rvcn_dec_vp9_nmv_component_s{ - unsigned char sign; - unsigned char classes[10]; - unsigned char class0[1]; - unsigned char bits[10]; - unsigned char class0_fp[2][3]; - unsigned char fp[3]; - unsigned char class0_hp; - unsigned char hp; +typedef struct rvcn_dec_vp9_nmv_component_s { + unsigned char sign; + unsigned char classes[10]; + unsigned char class0[1]; + unsigned char bits[10]; + unsigned char class0_fp[2][3]; + unsigned char fp[3]; + unsigned char class0_hp; + unsigned char hp; } rvcn_dec_vp9_nmv_component_t; typedef struct rvcn_dec_vp9_probs_s { - rvcn_dec_vp9_nmv_ctx_mask_t nmvc_mask; - unsigned char coef_probs[4][2][2][6][6][3]; - unsigned char y_mode_prob[4][9]; - unsigned char uv_mode_prob[10][9]; - unsigned char single_ref_prob[5][2]; - unsigned char switchable_interp_prob[4][2]; - unsigned char partition_prob[16][3]; - unsigned char inter_mode_probs[7][3]; - unsigned char mbskip_probs[3]; - unsigned char intra_inter_prob[4]; - unsigned char comp_inter_prob[5]; - unsigned char comp_ref_prob[5]; - unsigned char tx_probs_32x32[2][3]; - unsigned char tx_probs_16x16[2][2]; - unsigned char tx_probs_8x8[2][1]; - unsigned char mv_joints[3]; - rvcn_dec_vp9_nmv_component_t mv_comps[2]; + rvcn_dec_vp9_nmv_ctx_mask_t nmvc_mask; + unsigned char coef_probs[4][2][2][6][6][3]; + unsigned char y_mode_prob[4][9]; + unsigned char uv_mode_prob[10][9]; + unsigned char single_ref_prob[5][2]; + unsigned char switchable_interp_prob[4][2]; + unsigned char partition_prob[16][3]; + unsigned char inter_mode_probs[7][3]; + unsigned char mbskip_probs[3]; + unsigned char intra_inter_prob[4]; + unsigned char comp_inter_prob[5]; + unsigned char comp_ref_prob[5]; + unsigned char tx_probs_32x32[2][3]; + unsigned char tx_probs_16x16[2][2]; + unsigned char tx_probs_8x8[2][1]; + unsigned char mv_joints[3]; + rvcn_dec_vp9_nmv_component_t mv_comps[2]; } rvcn_dec_vp9_probs_t; typedef struct rvcn_dec_vp9_probs_segment_s { - union { - rvcn_dec_vp9_probs_t probs; - unsigned char probs_data[RDECODE_VP9_PROBS_DATA_SIZE]; - }; - - union { - struct { - unsigned int feature_data[8]; - unsigned char tree_probs[7]; - unsigned char pred_probs[3]; - unsigned char abs_delta; - unsigned char feature_mask[8]; - } seg; - unsigned char segment_data[256]; - }; + union { + rvcn_dec_vp9_probs_t probs; + unsigned char probs_data[RDECODE_VP9_PROBS_DATA_SIZE]; + }; + + union { + struct { + unsigned int feature_data[8]; + unsigned char tree_probs[7]; + unsigned char pred_probs[3]; + unsigned char abs_delta; + unsigned char feature_mask[8]; + } seg; + unsigned char segment_data[256]; + }; } rvcn_dec_vp9_probs_segment_t; struct jpeg_params { - unsigned bsd_size; - unsigned dt_pitch; - unsigned dt_uv_pitch; - unsigned dt_luma_top_offset; - unsigned dt_chroma_top_offset; - bool direct_reg; + unsigned bsd_size; + unsigned dt_pitch; + unsigned dt_uv_pitch; + unsigned dt_luma_top_offset; + unsigned dt_chroma_top_offset; + bool direct_reg; }; struct radeon_decoder { - struct pipe_video_codec base; - - unsigned stream_handle; - unsigned stream_type; - unsigned frame_number; - - struct pipe_screen *screen; - struct radeon_winsys *ws; - struct radeon_cmdbuf *cs; - - void *msg; - uint32_t *fb; - uint8_t *it; - uint8_t *probs; - void *bs_ptr; - - struct rvid_buffer msg_fb_it_probs_buffers[NUM_BUFFERS]; - struct rvid_buffer bs_buffers[NUM_BUFFERS]; - struct rvid_buffer dpb; - struct rvid_buffer ctx; - struct rvid_buffer sessionctx; - - unsigned bs_size; - unsigned cur_buffer; - void *render_pic_list[16]; - bool show_frame; - unsigned ref_idx; - struct { - unsigned data0; - unsigned data1; - unsigned cmd; - unsigned cntl; - } reg; - struct jpeg_params jpg; - void (*send_cmd)(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture); + struct pipe_video_codec base; + + unsigned stream_handle; + unsigned stream_type; + unsigned frame_number; + + struct pipe_screen *screen; + struct radeon_winsys *ws; + struct radeon_cmdbuf *cs; + + void *msg; + uint32_t *fb; + uint8_t *it; + uint8_t *probs; + void *bs_ptr; + + struct rvid_buffer msg_fb_it_probs_buffers[NUM_BUFFERS]; + struct rvid_buffer bs_buffers[NUM_BUFFERS]; + struct rvid_buffer dpb; + struct rvid_buffer ctx; + struct rvid_buffer sessionctx; + + unsigned bs_size; + unsigned cur_buffer; + void *render_pic_list[16]; + bool show_frame; + unsigned ref_idx; + struct { + unsigned data0; + unsigned data1; + unsigned cmd; + unsigned cntl; + } reg; + struct jpeg_params jpg; + void (*send_cmd)(struct radeon_decoder *dec, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); }; -void send_cmd_dec(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture); +void send_cmd_dec(struct radeon_decoder *dec, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); -void send_cmd_jpeg(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture); +void send_cmd_jpeg(struct radeon_decoder *dec, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, - const struct pipe_video_codec *templat); + const struct pipe_video_codec *templat); #endif diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec_jpeg.c b/src/gallium/drivers/radeon/radeon_vcn_dec_jpeg.c index 1214c9438d1..e153436f1d7 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_dec_jpeg.c +++ b/src/gallium/drivers/radeon/radeon_vcn_dec_jpeg.c @@ -25,288 +25,277 @@ * **************************************************************************/ -#include <assert.h> -#include <stdio.h> - #include "pipe/p_video_codec.h" - +#include "radeon_vcn_dec.h" +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" #include "util/u_video.h" -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_vcn_dec.h" +#include <assert.h> +#include <stdio.h> static struct pb_buffer *radeon_jpeg_get_decode_param(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct si_texture *luma = (struct si_texture *) - ((struct vl_video_buffer *)target)->resources[0]; - struct si_texture *chroma = (struct si_texture *) - ((struct vl_video_buffer *)target)->resources[1]; - - dec->jpg.bsd_size = align(dec->bs_size, 128); - dec->jpg.dt_luma_top_offset = luma->surface.u.gfx9.surf_offset; - if (target->buffer_format == PIPE_FORMAT_NV12) - dec->jpg.dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset; - dec->jpg.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; - dec->jpg.dt_uv_pitch = dec->jpg.dt_pitch / 2; - - return luma->buffer.buf; + struct si_texture *luma = (struct si_texture *)((struct vl_video_buffer *)target)->resources[0]; + struct si_texture *chroma = + (struct si_texture *)((struct vl_video_buffer *)target)->resources[1]; + + dec->jpg.bsd_size = align(dec->bs_size, 128); + dec->jpg.dt_luma_top_offset = luma->surface.u.gfx9.surf_offset; + if (target->buffer_format == PIPE_FORMAT_NV12) + dec->jpg.dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset; + dec->jpg.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; + dec->jpg.dt_uv_pitch = dec->jpg.dt_pitch / 2; + + return luma->buffer.buf; } /* add a new set register command to the IB */ -static void set_reg_jpeg(struct radeon_decoder *dec, unsigned reg, - unsigned cond, unsigned type, uint32_t val) +static void set_reg_jpeg(struct radeon_decoder *dec, unsigned reg, unsigned cond, unsigned type, + uint32_t val) { - radeon_emit(dec->cs, RDECODE_PKTJ(reg, cond, type)); - radeon_emit(dec->cs, val); + radeon_emit(dec->cs, RDECODE_PKTJ(reg, cond, type)); + radeon_emit(dec->cs, val); } /* send a bitstream buffer command */ -static void send_cmd_bitstream(struct radeon_decoder *dec, - struct pb_buffer* buf, uint32_t off, - enum radeon_bo_usage usage, enum radeon_bo_domain domain) +static void send_cmd_bitstream(struct radeon_decoder *dec, struct pb_buffer *buf, uint32_t off, + enum radeon_bo_usage usage, enum radeon_bo_domain domain) { - uint64_t addr; + uint64_t addr; - // jpeg soft reset - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 1); + // jpeg soft reset + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 1); - // ensuring the Reset is asserted in SCLK domain - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C2); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0x01400200); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (1 << 9)); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); + // ensuring the Reset is asserted in SCLK domain + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C2); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0x01400200); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (1 << 9)); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); - // wait mem - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0); + // wait mem + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0); - // ensuring the Reset is de-asserted in SCLK domain - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (0 << 9)); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); + // ensuring the Reset is de-asserted in SCLK domain + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (0 << 9)); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); - dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); - addr = dec->ws->buffer_get_virtual_address(buf); - addr = addr + off; + dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + addr = dec->ws->buffer_get_virtual_address(buf); + addr = addr + off; - // set UVD_LMI_JPEG_READ_64BIT_BAR_LOW/HIGH based on bitstream buffer address - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH), COND0, TYPE0, (addr >> 32)); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW), COND0, TYPE0, addr); + // set UVD_LMI_JPEG_READ_64BIT_BAR_LOW/HIGH based on bitstream buffer address + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH), COND0, TYPE0, + (addr >> 32)); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW), COND0, TYPE0, addr); - // set jpeg_rb_base - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_BASE), COND0, TYPE0, 0); + // set jpeg_rb_base + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_BASE), COND0, TYPE0, 0); - // set jpeg_rb_base - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_SIZE), COND0, TYPE0, 0xFFFFFFF0); + // set jpeg_rb_base + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_SIZE), COND0, TYPE0, 0xFFFFFFF0); - // set jpeg_rb_wptr - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_WPTR), COND0, TYPE0, (dec->jpg.bsd_size >> 2)); + // set jpeg_rb_wptr + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_WPTR), COND0, TYPE0, (dec->jpg.bsd_size >> 2)); } /* send a target buffer command */ -static void send_cmd_target(struct radeon_decoder *dec, - struct pb_buffer* buf, uint32_t off, - enum radeon_bo_usage usage, enum radeon_bo_domain domain) +static void send_cmd_target(struct radeon_decoder *dec, struct pb_buffer *buf, uint32_t off, + enum radeon_bo_usage usage, enum radeon_bo_domain domain) { - uint64_t addr; - - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_PITCH), COND0, TYPE0, (dec->jpg.dt_pitch >> 4)); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_UV_PITCH), COND0, TYPE0, ((dec->jpg.dt_uv_pitch * 2) >> 4)); - - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_TILING_CTRL), COND0, TYPE0, 0); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_UV_TILING_CTRL), COND0, TYPE0, 0); - - dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); - addr = dec->ws->buffer_get_virtual_address(buf); - addr = addr + off; - - // set UVD_LMI_JPEG_WRITE_64BIT_BAR_LOW/HIGH based on target buffer address - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH), COND0, TYPE0, (addr >> 32)); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW), COND0, TYPE0, addr); - - // set output buffer data address - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_INDEX), COND0, TYPE0, 0); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_DATA), COND0, TYPE0, dec->jpg.dt_luma_top_offset); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_INDEX), COND0, TYPE0, 1); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_DATA), COND0, TYPE0, dec->jpg.dt_chroma_top_offset); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_TIER_CNTL2), COND0, TYPE3, 0); - - // set output buffer read pointer - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_OUTBUF_RPTR), COND0, TYPE0, 0); - - // enable error interrupts - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_INT_EN), COND0, TYPE0, 0xFFFFFFFE); - - // start engine command - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0x6); - - // wait for job completion, wait for job JBSI fetch done - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (dec->jpg.bsd_size >> 2)); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C2); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0x01400200); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_RPTR), COND0, TYPE3, 0xFFFFFFFF); - - // wait for job jpeg outbuf idle - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0xFFFFFFFF); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_OUTBUF_WPTR), COND0, TYPE3, 0x00000001); - - // stop engine - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0x4); - - // asserting jpeg lmi drop - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x0005); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (1 << 23 | 1 << 0)); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE1, 0); - - // asserting jpeg reset - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 1); - - // ensure reset is asserted in sclk domain - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (1 << 9)); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); - - // de-assert jpeg reset - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0); - - // ensure reset is de-asserted in sclk domain - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (0 << 9)); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); - - // de-asserting jpeg lmi drop - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x0005); - set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0); + uint64_t addr; + + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_PITCH), COND0, TYPE0, (dec->jpg.dt_pitch >> 4)); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_UV_PITCH), COND0, TYPE0, + ((dec->jpg.dt_uv_pitch * 2) >> 4)); + + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_TILING_CTRL), COND0, TYPE0, 0); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_UV_TILING_CTRL), COND0, TYPE0, 0); + + dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + addr = dec->ws->buffer_get_virtual_address(buf); + addr = addr + off; + + // set UVD_LMI_JPEG_WRITE_64BIT_BAR_LOW/HIGH based on target buffer address + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH), COND0, TYPE0, + (addr >> 32)); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW), COND0, TYPE0, addr); + + // set output buffer data address + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_INDEX), COND0, TYPE0, 0); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_DATA), COND0, TYPE0, dec->jpg.dt_luma_top_offset); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_INDEX), COND0, TYPE0, 1); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_DATA), COND0, TYPE0, dec->jpg.dt_chroma_top_offset); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_TIER_CNTL2), COND0, TYPE3, 0); + + // set output buffer read pointer + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_OUTBUF_RPTR), COND0, TYPE0, 0); + + // enable error interrupts + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_INT_EN), COND0, TYPE0, 0xFFFFFFFE); + + // start engine command + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0x6); + + // wait for job completion, wait for job JBSI fetch done + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (dec->jpg.bsd_size >> 2)); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C2); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0x01400200); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_RB_RPTR), COND0, TYPE3, 0xFFFFFFFF); + + // wait for job jpeg outbuf idle + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0xFFFFFFFF); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_OUTBUF_WPTR), COND0, TYPE3, 0x00000001); + + // stop engine + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0x4); + + // asserting jpeg lmi drop + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x0005); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (1 << 23 | 1 << 0)); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE1, 0); + + // asserting jpeg reset + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 1); + + // ensure reset is asserted in sclk domain + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (1 << 9)); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); + + // de-assert jpeg reset + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0); + + // ensure reset is de-asserted in sclk domain + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (0 << 9)); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); + + // de-asserting jpeg lmi drop + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x0005); + set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0); } /* send a bitstream buffer command */ -static void send_cmd_bitstream_direct(struct radeon_decoder *dec, - struct pb_buffer* buf, uint32_t off, - enum radeon_bo_usage usage, enum radeon_bo_domain domain) +static void send_cmd_bitstream_direct(struct radeon_decoder *dec, struct pb_buffer *buf, + uint32_t off, enum radeon_bo_usage usage, + enum radeon_bo_domain domain) { - uint64_t addr; + uint64_t addr; - // jpeg soft reset - set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND0, TYPE0, 1); + // jpeg soft reset + set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND0, TYPE0, 1); - // ensuring the Reset is asserted in SCLK domain - set_reg_jpeg(dec, vcnipUVD_JRBC_IB_COND_RD_TIMER, COND0, TYPE0, 0x01400200); - set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (0x1 << 0x10)); - set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND3, TYPE3, (0x1 << 0x10)); + // ensuring the Reset is asserted in SCLK domain + set_reg_jpeg(dec, vcnipUVD_JRBC_IB_COND_RD_TIMER, COND0, TYPE0, 0x01400200); + set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (0x1 << 0x10)); + set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND3, TYPE3, (0x1 << 0x10)); - // wait mem - set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND0, TYPE0, 0); + // wait mem + set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND0, TYPE0, 0); - // ensuring the Reset is de-asserted in SCLK domain - set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (0 << 0x10)); - set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND3, TYPE3, (0x1 << 0x10)); + // ensuring the Reset is de-asserted in SCLK domain + set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (0 << 0x10)); + set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND3, TYPE3, (0x1 << 0x10)); - dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); - addr = dec->ws->buffer_get_virtual_address(buf); - addr = addr + off; + dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + addr = dec->ws->buffer_get_virtual_address(buf); + addr = addr + off; - // set UVD_LMI_JPEG_READ_64BIT_BAR_LOW/HIGH based on bitstream buffer address - set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH, COND0, TYPE0, (addr >> 32)); - set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW, COND0, TYPE0, addr); + // set UVD_LMI_JPEG_READ_64BIT_BAR_LOW/HIGH based on bitstream buffer address + set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH, COND0, TYPE0, (addr >> 32)); + set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW, COND0, TYPE0, addr); - // set jpeg_rb_base - set_reg_jpeg(dec, vcnipUVD_JPEG_RB_BASE, COND0, TYPE0, 0); + // set jpeg_rb_base + set_reg_jpeg(dec, vcnipUVD_JPEG_RB_BASE, COND0, TYPE0, 0); - // set jpeg_rb_base - set_reg_jpeg(dec, vcnipUVD_JPEG_RB_SIZE, COND0, TYPE0, 0xFFFFFFF0); + // set jpeg_rb_base + set_reg_jpeg(dec, vcnipUVD_JPEG_RB_SIZE, COND0, TYPE0, 0xFFFFFFF0); - // set jpeg_rb_wptr - set_reg_jpeg(dec, vcnipUVD_JPEG_RB_WPTR, COND0, TYPE0, (dec->jpg.bsd_size >> 2)); + // set jpeg_rb_wptr + set_reg_jpeg(dec, vcnipUVD_JPEG_RB_WPTR, COND0, TYPE0, (dec->jpg.bsd_size >> 2)); } /* send a target buffer command */ -static void send_cmd_target_direct(struct radeon_decoder *dec, - struct pb_buffer* buf, uint32_t off, - enum radeon_bo_usage usage, enum radeon_bo_domain domain) +static void send_cmd_target_direct(struct radeon_decoder *dec, struct pb_buffer *buf, uint32_t off, + enum radeon_bo_usage usage, enum radeon_bo_domain domain) { - uint64_t addr; + uint64_t addr; - set_reg_jpeg(dec, vcnipUVD_JPEG_PITCH, COND0, TYPE0, (dec->jpg.dt_pitch >> 4)); - set_reg_jpeg(dec, vcnipUVD_JPEG_UV_PITCH, COND0, TYPE0, ((dec->jpg.dt_uv_pitch * 2) >> 4)); + set_reg_jpeg(dec, vcnipUVD_JPEG_PITCH, COND0, TYPE0, (dec->jpg.dt_pitch >> 4)); + set_reg_jpeg(dec, vcnipUVD_JPEG_UV_PITCH, COND0, TYPE0, ((dec->jpg.dt_uv_pitch * 2) >> 4)); - set_reg_jpeg(dec, vcnipJPEG_DEC_ADDR_MODE, COND0, TYPE0, 0); - set_reg_jpeg(dec, vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE, COND0, TYPE0, 0); - set_reg_jpeg(dec, vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE, COND0, TYPE0, 0); + set_reg_jpeg(dec, vcnipJPEG_DEC_ADDR_MODE, COND0, TYPE0, 0); + set_reg_jpeg(dec, vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE, COND0, TYPE0, 0); + set_reg_jpeg(dec, vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE, COND0, TYPE0, 0); - dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); - addr = dec->ws->buffer_get_virtual_address(buf); - addr = addr + off; + dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + addr = dec->ws->buffer_get_virtual_address(buf); + addr = addr + off; - // set UVD_LMI_JPEG_WRITE_64BIT_BAR_LOW/HIGH based on target buffer address - set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH, COND0, TYPE0, (addr >> 32)); - set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW, COND0, TYPE0, addr); + // set UVD_LMI_JPEG_WRITE_64BIT_BAR_LOW/HIGH based on target buffer address + set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH, COND0, TYPE0, (addr >> 32)); + set_reg_jpeg(dec, vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW, COND0, TYPE0, addr); - // set output buffer data address - set_reg_jpeg(dec, vcnipUVD_JPEG_INDEX, COND0, TYPE0, 0); - set_reg_jpeg(dec, vcnipUVD_JPEG_DATA, COND0, TYPE0, dec->jpg.dt_luma_top_offset); - set_reg_jpeg(dec, vcnipUVD_JPEG_INDEX, COND0, TYPE0, 1); - set_reg_jpeg(dec, vcnipUVD_JPEG_DATA, COND0, TYPE0, dec->jpg.dt_chroma_top_offset); - set_reg_jpeg(dec, vcnipUVD_JPEG_TIER_CNTL2, COND0, 0, 0); + // set output buffer data address + set_reg_jpeg(dec, vcnipUVD_JPEG_INDEX, COND0, TYPE0, 0); + set_reg_jpeg(dec, vcnipUVD_JPEG_DATA, COND0, TYPE0, dec->jpg.dt_luma_top_offset); + set_reg_jpeg(dec, vcnipUVD_JPEG_INDEX, COND0, TYPE0, 1); + set_reg_jpeg(dec, vcnipUVD_JPEG_DATA, COND0, TYPE0, dec->jpg.dt_chroma_top_offset); + set_reg_jpeg(dec, vcnipUVD_JPEG_TIER_CNTL2, COND0, 0, 0); - // set output buffer read pointer - set_reg_jpeg(dec, vcnipUVD_JPEG_OUTBUF_RPTR, COND0, TYPE0, 0); - set_reg_jpeg(dec, vcnipUVD_JPEG_OUTBUF_CNTL, COND0, TYPE0, ((0x00001587 & (~0x00000180L)) | (0x1 << 0x7) | (0x1 << 0x6))); + // set output buffer read pointer + set_reg_jpeg(dec, vcnipUVD_JPEG_OUTBUF_RPTR, COND0, TYPE0, 0); + set_reg_jpeg(dec, vcnipUVD_JPEG_OUTBUF_CNTL, COND0, TYPE0, + ((0x00001587 & (~0x00000180L)) | (0x1 << 0x7) | (0x1 << 0x6))); - // enable error interrupts - set_reg_jpeg(dec, vcnipUVD_JPEG_INT_EN, COND0, TYPE0, 0xFFFFFFFE); + // enable error interrupts + set_reg_jpeg(dec, vcnipUVD_JPEG_INT_EN, COND0, TYPE0, 0xFFFFFFFE); - // start engine command - set_reg_jpeg(dec, vcnipUVD_JPEG_CNTL, COND0, TYPE0, 0xE); + // start engine command + set_reg_jpeg(dec, vcnipUVD_JPEG_CNTL, COND0, TYPE0, 0xE); - // wait for job completion, wait for job JBSI fetch done - set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (dec->jpg.bsd_size >> 2)); - set_reg_jpeg(dec, vcnipUVD_JRBC_IB_COND_RD_TIMER, COND0, TYPE0, 0x01400200); - set_reg_jpeg(dec, vcnipUVD_JPEG_RB_RPTR, COND3, TYPE3, 0xFFFFFFFF); + // wait for job completion, wait for job JBSI fetch done + set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (dec->jpg.bsd_size >> 2)); + set_reg_jpeg(dec, vcnipUVD_JRBC_IB_COND_RD_TIMER, COND0, TYPE0, 0x01400200); + set_reg_jpeg(dec, vcnipUVD_JPEG_RB_RPTR, COND3, TYPE3, 0xFFFFFFFF); - // wait for job jpeg outbuf idle - set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, 0xFFFFFFFF); - set_reg_jpeg(dec, vcnipUVD_JPEG_OUTBUF_WPTR, COND3, TYPE3, 0x00000001); + // wait for job jpeg outbuf idle + set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, 0xFFFFFFFF); + set_reg_jpeg(dec, vcnipUVD_JPEG_OUTBUF_WPTR, COND3, TYPE3, 0x00000001); - // stop engine - set_reg_jpeg(dec, vcnipUVD_JPEG_CNTL, COND0, TYPE0, 0x4); + // stop engine + set_reg_jpeg(dec, vcnipUVD_JPEG_CNTL, COND0, TYPE0, 0x4); } /** * send cmd for vcn jpeg */ -void send_cmd_jpeg(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) +void send_cmd_jpeg(struct radeon_decoder *dec, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct pb_buffer *dt; - struct rvid_buffer *bs_buf; - - bs_buf = &dec->bs_buffers[dec->cur_buffer]; - - memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size); - dec->ws->buffer_unmap(bs_buf->res->buf); - - dt = radeon_jpeg_get_decode_param(dec, target, picture); - - if (dec->jpg.direct_reg == true) { - send_cmd_bitstream_direct(dec, bs_buf->res->buf, - 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - send_cmd_target_direct(dec, dt, 0, - RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); - } else { - send_cmd_bitstream(dec, bs_buf->res->buf, - 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - send_cmd_target(dec, dt, 0, - RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); - } + struct pb_buffer *dt; + struct rvid_buffer *bs_buf; + + bs_buf = &dec->bs_buffers[dec->cur_buffer]; + + memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size); + dec->ws->buffer_unmap(bs_buf->res->buf); + + dt = radeon_jpeg_get_decode_param(dec, target, picture); + + if (dec->jpg.direct_reg == true) { + send_cmd_bitstream_direct(dec, bs_buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + send_cmd_target_direct(dec, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); + } else { + send_cmd_bitstream(dec, bs_buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + send_cmd_target(dec, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); + } } diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c index 9643c0817ab..8635f9d52d0 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c @@ -25,18 +25,16 @@ * **************************************************************************/ -#include <stdio.h> +#include "radeon_vcn_enc.h" #include "pipe/p_video_codec.h" - -#include "util/u_video.h" +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" - +#include "util/u_video.h" #include "vl/vl_video_buffer.h" -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_vcn_enc.h" +#include <stdio.h> static const unsigned index_to_shifts[4] = {24, 16, 8, 0}; @@ -53,15 +51,15 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic enc->enc_pic.not_referenced = pic->not_referenced; enc->enc_pic.is_idr = (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); if (pic->pic_ctrl.enc_frame_cropping_flag) { - enc->enc_pic.crop_left = pic->pic_ctrl.enc_frame_crop_left_offset; - enc->enc_pic.crop_right = pic->pic_ctrl.enc_frame_crop_right_offset; - enc->enc_pic.crop_top = pic->pic_ctrl.enc_frame_crop_top_offset; - enc->enc_pic.crop_bottom = pic->pic_ctrl.enc_frame_crop_bottom_offset; + enc->enc_pic.crop_left = pic->pic_ctrl.enc_frame_crop_left_offset; + enc->enc_pic.crop_right = pic->pic_ctrl.enc_frame_crop_right_offset; + enc->enc_pic.crop_top = pic->pic_ctrl.enc_frame_crop_top_offset; + enc->enc_pic.crop_bottom = pic->pic_ctrl.enc_frame_crop_bottom_offset; } else { - enc->enc_pic.crop_left = 0; - enc->enc_pic.crop_right = (align(enc->base.width, 16) - enc->base.width) / 2; - enc->enc_pic.crop_top = 0; - enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - enc->base.height) / 2; + enc->enc_pic.crop_left = 0; + enc->enc_pic.crop_right = (align(enc->base.width, 16) - enc->base.width) / 2; + enc->enc_pic.crop_top = 0; + enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - enc->base.height) / 2; } enc->enc_pic.rc_layer_init.target_bit_rate = pic->rate_ctrl.target_bitrate; enc->enc_pic.rc_layer_init.peak_bit_rate = pic->rate_ctrl.peak_bitrate; @@ -69,8 +67,10 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic enc->enc_pic.rc_layer_init.frame_rate_den = pic->rate_ctrl.frame_rate_den; enc->enc_pic.rc_layer_init.vbv_buffer_size = pic->rate_ctrl.vbv_buffer_size; enc->enc_pic.rc_layer_init.avg_target_bits_per_picture = pic->rate_ctrl.target_bits_picture; - enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer = pic->rate_ctrl.peak_bits_picture_integer; - enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional = pic->rate_ctrl.peak_bits_picture_fraction; + enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer = + pic->rate_ctrl.peak_bits_picture_integer; + enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional = + pic->rate_ctrl.peak_bits_picture_fraction; enc->enc_pic.rc_session_init.vbv_buffer_level = pic->rate_ctrl.vbv_buf_lv; enc->enc_pic.rc_per_pic.qp = pic->quant_i_frames; enc->enc_pic.rc_per_pic.min_qp_app = 0; @@ -79,20 +79,21 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic enc->enc_pic.rc_per_pic.enabled_filler_data = pic->rate_ctrl.fill_data_enable; enc->enc_pic.rc_per_pic.skip_frame_enable = false; enc->enc_pic.rc_per_pic.enforce_hrd = pic->rate_ctrl.enforce_hrd; - switch(pic->rate_ctrl.rate_ctrl_method) { - case PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE: - enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; - break; - case PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP: - case PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT: - enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_CBR; - break; - case PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP: - case PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE: - enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR; - break; - default: - enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; + switch (pic->rate_ctrl.rate_ctrl_method) { + case PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE: + enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; + break; + case PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP: + case PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT: + enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_CBR; + break; + case PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP: + case PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE: + enc->enc_pic.rc_session_init.rate_control_method = + RENCODE_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR; + break; + default: + enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; } } else if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) { struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; @@ -112,37 +113,47 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag; enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc; enc->enc_pic.general_level_idc = pic->seq.general_level_idc; - enc->enc_pic.max_poc = - MAX2(16, util_next_power_of_two(pic->seq.intra_period)); + enc->enc_pic.max_poc = MAX2(16, util_next_power_of_two(pic->seq.intra_period)); enc->enc_pic.log2_max_poc = 0; for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++) i = (i >> 1); enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc; enc->enc_pic.pic_width_in_luma_samples = pic->seq.pic_width_in_luma_samples; enc->enc_pic.pic_height_in_luma_samples = pic->seq.pic_height_in_luma_samples; - enc->enc_pic.log2_diff_max_min_luma_coding_block_size = pic->seq.log2_diff_max_min_luma_coding_block_size; - enc->enc_pic.log2_min_transform_block_size_minus2 = pic->seq.log2_min_transform_block_size_minus2; - enc->enc_pic.log2_diff_max_min_transform_block_size = pic->seq.log2_diff_max_min_transform_block_size; - enc->enc_pic.max_transform_hierarchy_depth_inter = pic->seq.max_transform_hierarchy_depth_inter; - enc->enc_pic.max_transform_hierarchy_depth_intra = pic->seq.max_transform_hierarchy_depth_intra; + enc->enc_pic.log2_diff_max_min_luma_coding_block_size = + pic->seq.log2_diff_max_min_luma_coding_block_size; + enc->enc_pic.log2_min_transform_block_size_minus2 = + pic->seq.log2_min_transform_block_size_minus2; + enc->enc_pic.log2_diff_max_min_transform_block_size = + pic->seq.log2_diff_max_min_transform_block_size; + enc->enc_pic.max_transform_hierarchy_depth_inter = + pic->seq.max_transform_hierarchy_depth_inter; + enc->enc_pic.max_transform_hierarchy_depth_intra = + pic->seq.max_transform_hierarchy_depth_intra; enc->enc_pic.log2_parallel_merge_level_minus2 = pic->pic.log2_parallel_merge_level_minus2; enc->enc_pic.bit_depth_luma_minus8 = pic->seq.bit_depth_luma_minus8; enc->enc_pic.bit_depth_chroma_minus8 = pic->seq.bit_depth_chroma_minus8; enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type; enc->enc_pic.max_num_merge_cand = pic->slice.max_num_merge_cand; - enc->enc_pic.sample_adaptive_offset_enabled_flag = pic->seq.sample_adaptive_offset_enabled_flag; + enc->enc_pic.sample_adaptive_offset_enabled_flag = + pic->seq.sample_adaptive_offset_enabled_flag; enc->enc_pic.pcm_enabled_flag = pic->seq.pcm_enabled_flag; enc->enc_pic.sps_temporal_mvp_enabled_flag = pic->seq.sps_temporal_mvp_enabled_flag; - enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled = pic->slice.slice_loop_filter_across_slices_enabled_flag; - enc->enc_pic.hevc_deblock.deblocking_filter_disabled = pic->slice.slice_deblocking_filter_disabled_flag; + enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled = + pic->slice.slice_loop_filter_across_slices_enabled_flag; + enc->enc_pic.hevc_deblock.deblocking_filter_disabled = + pic->slice.slice_deblocking_filter_disabled_flag; enc->enc_pic.hevc_deblock.beta_offset_div2 = pic->slice.slice_beta_offset_div2; enc->enc_pic.hevc_deblock.tc_offset_div2 = pic->slice.slice_tc_offset_div2; enc->enc_pic.hevc_deblock.cb_qp_offset = pic->slice.slice_cb_qp_offset; enc->enc_pic.hevc_deblock.cr_qp_offset = pic->slice.slice_cr_qp_offset; - enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 = pic->seq.log2_min_luma_coding_block_size_minus3; + enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 = + pic->seq.log2_min_luma_coding_block_size_minus3; enc->enc_pic.hevc_spec_misc.amp_disabled = !pic->seq.amp_enabled_flag; - enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled = pic->seq.strong_intra_smoothing_enabled_flag; - enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag = pic->pic.constrained_intra_pred_flag; + enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled = + pic->seq.strong_intra_smoothing_enabled_flag; + enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag = + pic->pic.constrained_intra_pred_flag; enc->enc_pic.hevc_spec_misc.cabac_init_flag = pic->slice.cabac_init_flag; enc->enc_pic.hevc_spec_misc.half_pel_enabled = 1; enc->enc_pic.hevc_spec_misc.quarter_pel_enabled = 1; @@ -153,7 +164,8 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic enc->enc_pic.rc_layer_init.vbv_buffer_size = pic->rc.vbv_buffer_size; enc->enc_pic.rc_layer_init.avg_target_bits_per_picture = pic->rc.target_bits_picture; enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer = pic->rc.peak_bits_picture_integer; - enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional = pic->rc.peak_bits_picture_fraction; + enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional = + pic->rc.peak_bits_picture_fraction; enc->enc_pic.rc_session_init.vbv_buffer_level = pic->rc.vbv_buf_lv; enc->enc_pic.rc_per_pic.qp = pic->rc.quant_i_frames; enc->enc_pic.rc_per_pic.min_qp_app = 0; @@ -162,424 +174,420 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic enc->enc_pic.rc_per_pic.enabled_filler_data = pic->rc.fill_data_enable; enc->enc_pic.rc_per_pic.skip_frame_enable = false; enc->enc_pic.rc_per_pic.enforce_hrd = pic->rc.enforce_hrd; - switch(pic->rc.rate_ctrl_method) { - case PIPE_H265_ENC_RATE_CONTROL_METHOD_DISABLE: + switch (pic->rc.rate_ctrl_method) { + case PIPE_H265_ENC_RATE_CONTROL_METHOD_DISABLE: + enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; + break; + case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP: + case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT: + enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_CBR; + break; + case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP: + case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE: + enc->enc_pic.rc_session_init.rate_control_method = + RENCODE_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR; + break; + default: enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; - break; - case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP: - case PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT: - enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_CBR; - break; - case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP: - case PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE: - enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR; - break; - default: - enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; } } } static void flush(struct radeon_encoder *enc) { - enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); + enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); } static void radeon_enc_flush(struct pipe_video_codec *encoder) { - struct radeon_encoder *enc = (struct radeon_encoder*)encoder; - flush(enc); + struct radeon_encoder *enc = (struct radeon_encoder *)encoder; + flush(enc); } -static void radeon_enc_cs_flush(void *ctx, unsigned flags, - struct pipe_fence_handle **fence) +static void radeon_enc_cs_flush(void *ctx, unsigned flags, struct pipe_fence_handle **fence) { - // just ignored + // just ignored } static unsigned get_cpb_num(struct radeon_encoder *enc) { - unsigned w = align(enc->base.width, 16) / 16; - unsigned h = align(enc->base.height, 16) / 16; - unsigned dpb; - - switch (enc->base.level) { - case 10: - dpb = 396; - break; - case 11: - dpb = 900; - break; - case 12: - case 13: - case 20: - dpb = 2376; - break; - case 21: - dpb = 4752; - break; - case 22: - case 30: - dpb = 8100; - break; - case 31: - dpb = 18000; - break; - case 32: - dpb = 20480; - break; - case 40: - case 41: - dpb = 32768; - break; - case 42: - dpb = 34816; - break; - case 50: - dpb = 110400; - break; - default: - case 51: - case 52: - dpb = 184320; - break; - } - - return MIN2(dpb / (w * h), 16); + unsigned w = align(enc->base.width, 16) / 16; + unsigned h = align(enc->base.height, 16) / 16; + unsigned dpb; + + switch (enc->base.level) { + case 10: + dpb = 396; + break; + case 11: + dpb = 900; + break; + case 12: + case 13: + case 20: + dpb = 2376; + break; + case 21: + dpb = 4752; + break; + case 22: + case 30: + dpb = 8100; + break; + case 31: + dpb = 18000; + break; + case 32: + dpb = 20480; + break; + case 40: + case 41: + dpb = 32768; + break; + case 42: + dpb = 34816; + break; + case 50: + dpb = 110400; + break; + default: + case 51: + case 52: + dpb = 184320; + break; + } + + return MIN2(dpb / (w * h), 16); } static void radeon_enc_begin_frame(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_picture_desc *picture) + struct pipe_video_buffer *source, + struct pipe_picture_desc *picture) { - struct radeon_encoder *enc = (struct radeon_encoder*)encoder; - struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source; - bool need_rate_control = false; - - if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC) { - struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture; - need_rate_control = - enc->enc_pic.rc_layer_init.target_bit_rate != pic->rate_ctrl.target_bitrate; - } else if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) { - struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; - need_rate_control = - enc->enc_pic.rc_layer_init.target_bit_rate != pic->rc.target_bitrate; - } - - radeon_vcn_enc_get_param(enc, picture); - - enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma); - enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma); - - enc->need_feedback = false; - - if (!enc->stream_handle) { - struct rvid_buffer fb; - enc->stream_handle = si_vid_alloc_stream_handle(); - enc->si = CALLOC_STRUCT(rvid_buffer); - si_vid_create_buffer(enc->screen, enc->si, 128 * 1024, PIPE_USAGE_STAGING); - si_vid_create_buffer(enc->screen, &fb, 4096, PIPE_USAGE_STAGING); - enc->fb = &fb; - enc->begin(enc); - flush(enc); - si_vid_destroy_buffer(&fb); - } - if (need_rate_control) { - enc->begin(enc); - flush(enc); - } + struct radeon_encoder *enc = (struct radeon_encoder *)encoder; + struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source; + bool need_rate_control = false; + + if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC) { + struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture; + need_rate_control = + enc->enc_pic.rc_layer_init.target_bit_rate != pic->rate_ctrl.target_bitrate; + } else if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) { + struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; + need_rate_control = enc->enc_pic.rc_layer_init.target_bit_rate != pic->rc.target_bitrate; + } + + radeon_vcn_enc_get_param(enc, picture); + + enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma); + enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma); + + enc->need_feedback = false; + + if (!enc->stream_handle) { + struct rvid_buffer fb; + enc->stream_handle = si_vid_alloc_stream_handle(); + enc->si = CALLOC_STRUCT(rvid_buffer); + si_vid_create_buffer(enc->screen, enc->si, 128 * 1024, PIPE_USAGE_STAGING); + si_vid_create_buffer(enc->screen, &fb, 4096, PIPE_USAGE_STAGING); + enc->fb = &fb; + enc->begin(enc); + flush(enc); + si_vid_destroy_buffer(&fb); + } + if (need_rate_control) { + enc->begin(enc); + flush(enc); + } } static void radeon_enc_encode_bitstream(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_resource *destination, - void **fb) + struct pipe_video_buffer *source, + struct pipe_resource *destination, void **fb) { - struct radeon_encoder *enc = (struct radeon_encoder*)encoder; - enc->get_buffer(destination, &enc->bs_handle, NULL); - enc->bs_size = destination->width0; + struct radeon_encoder *enc = (struct radeon_encoder *)encoder; + enc->get_buffer(destination, &enc->bs_handle, NULL); + enc->bs_size = destination->width0; - *fb = enc->fb = CALLOC_STRUCT(rvid_buffer); + *fb = enc->fb = CALLOC_STRUCT(rvid_buffer); - if (!si_vid_create_buffer(enc->screen, enc->fb, 4096, PIPE_USAGE_STAGING)) { - RVID_ERR("Can't create feedback buffer.\n"); - return; - } + if (!si_vid_create_buffer(enc->screen, enc->fb, 4096, PIPE_USAGE_STAGING)) { + RVID_ERR("Can't create feedback buffer.\n"); + return; + } - enc->need_feedback = true; - enc->encode(enc); + enc->need_feedback = true; + enc->encode(enc); } -static void radeon_enc_end_frame(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_picture_desc *picture) +static void radeon_enc_end_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source, + struct pipe_picture_desc *picture) { - struct radeon_encoder *enc = (struct radeon_encoder*)encoder; - flush(enc); + struct radeon_encoder *enc = (struct radeon_encoder *)encoder; + flush(enc); } static void radeon_enc_destroy(struct pipe_video_codec *encoder) { - struct radeon_encoder *enc = (struct radeon_encoder*)encoder; - - if (enc->stream_handle) { - struct rvid_buffer fb; - enc->need_feedback = false; - si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); - enc->fb = &fb; - enc->destroy(enc); - flush(enc); - si_vid_destroy_buffer(&fb); - } - - si_vid_destroy_buffer(&enc->cpb); - enc->ws->cs_destroy(enc->cs); - FREE(enc); + struct radeon_encoder *enc = (struct radeon_encoder *)encoder; + + if (enc->stream_handle) { + struct rvid_buffer fb; + enc->need_feedback = false; + si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); + enc->fb = &fb; + enc->destroy(enc); + flush(enc); + si_vid_destroy_buffer(&fb); + } + + si_vid_destroy_buffer(&enc->cpb); + enc->ws->cs_destroy(enc->cs); + FREE(enc); } -static void radeon_enc_get_feedback(struct pipe_video_codec *encoder, - void *feedback, unsigned *size) +static void radeon_enc_get_feedback(struct pipe_video_codec *encoder, void *feedback, + unsigned *size) { - struct radeon_encoder *enc = (struct radeon_encoder*)encoder; - struct rvid_buffer *fb = feedback; - - if (size) { - uint32_t *ptr = enc->ws->buffer_map( - fb->res->buf, enc->cs, - PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); - if (ptr[1]) - *size = ptr[6]; - else - *size = 0; - enc->ws->buffer_unmap(fb->res->buf); - } - - si_vid_destroy_buffer(fb); - FREE(fb); + struct radeon_encoder *enc = (struct radeon_encoder *)encoder; + struct rvid_buffer *fb = feedback; + + if (size) { + uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, + PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); + if (ptr[1]) + *size = ptr[6]; + else + *size = 0; + enc->ws->buffer_unmap(fb->res->buf); + } + + si_vid_destroy_buffer(fb); + FREE(fb); } struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context, - const struct pipe_video_codec *templ, - struct radeon_winsys* ws, - radeon_enc_get_buffer get_buffer) + const struct pipe_video_codec *templ, + struct radeon_winsys *ws, + radeon_enc_get_buffer get_buffer) { - struct si_screen *sscreen = (struct si_screen *)context->screen; - struct si_context *sctx = (struct si_context*)context; - struct radeon_encoder *enc; - struct pipe_video_buffer *tmp_buf, templat = {}; - struct radeon_surf *tmp_surf; - unsigned cpb_size; - - enc = CALLOC_STRUCT(radeon_encoder); - - if (!enc) - return NULL; - - enc->alignment = 256; - enc->base = *templ; - enc->base.context = context; - enc->base.destroy = radeon_enc_destroy; - enc->base.begin_frame = radeon_enc_begin_frame; - enc->base.encode_bitstream = radeon_enc_encode_bitstream; - enc->base.end_frame = radeon_enc_end_frame; - enc->base.flush = radeon_enc_flush; - enc->base.get_feedback = radeon_enc_get_feedback; - enc->get_buffer = get_buffer; - enc->bits_in_shifter = 0; - enc->screen = context->screen; - enc->ws = ws; - enc->cs = ws->cs_create(sctx->ctx, RING_VCN_ENC, radeon_enc_cs_flush, - enc, false); - - if (!enc->cs) { - RVID_ERR("Can't get command submission context.\n"); - goto error; - } - - struct rvid_buffer si; - si_vid_create_buffer(enc->screen, &si, 128 * 1024, PIPE_USAGE_STAGING); - enc->si = &si; - - templat.buffer_format = PIPE_FORMAT_NV12; - if (enc->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) - templat.buffer_format = PIPE_FORMAT_P010; - templat.width = enc->base.width; - templat.height = enc->base.height; - templat.interlaced = false; - - if (!(tmp_buf = context->create_video_buffer(context, &templat))) { - RVID_ERR("Can't create video buffer.\n"); - goto error; - } - - enc->cpb_num = get_cpb_num(enc); - - if (!enc->cpb_num) - goto error; - - get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); - - cpb_size = (sscreen->info.chip_class < GFX9) ? - align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * - align(tmp_surf->u.legacy.level[0].nblk_y, 32) : - align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) * - align(tmp_surf->u.gfx9.surf_height, 32); - - cpb_size = cpb_size * 3 / 2; - cpb_size = cpb_size * enc->cpb_num; - tmp_buf->destroy(tmp_buf); - - if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't create CPB buffer.\n"); - goto error; - } - - if (sscreen->info.family <= CHIP_RAVEN2) - radeon_enc_1_2_init(enc); - else - radeon_enc_2_0_init(enc); - - return &enc->base; + struct si_screen *sscreen = (struct si_screen *)context->screen; + struct si_context *sctx = (struct si_context *)context; + struct radeon_encoder *enc; + struct pipe_video_buffer *tmp_buf, templat = {}; + struct radeon_surf *tmp_surf; + unsigned cpb_size; + + enc = CALLOC_STRUCT(radeon_encoder); + + if (!enc) + return NULL; + + enc->alignment = 256; + enc->base = *templ; + enc->base.context = context; + enc->base.destroy = radeon_enc_destroy; + enc->base.begin_frame = radeon_enc_begin_frame; + enc->base.encode_bitstream = radeon_enc_encode_bitstream; + enc->base.end_frame = radeon_enc_end_frame; + enc->base.flush = radeon_enc_flush; + enc->base.get_feedback = radeon_enc_get_feedback; + enc->get_buffer = get_buffer; + enc->bits_in_shifter = 0; + enc->screen = context->screen; + enc->ws = ws; + enc->cs = ws->cs_create(sctx->ctx, RING_VCN_ENC, radeon_enc_cs_flush, enc, false); + + if (!enc->cs) { + RVID_ERR("Can't get command submission context.\n"); + goto error; + } + + struct rvid_buffer si; + si_vid_create_buffer(enc->screen, &si, 128 * 1024, PIPE_USAGE_STAGING); + enc->si = &si; + + templat.buffer_format = PIPE_FORMAT_NV12; + if (enc->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + templat.buffer_format = PIPE_FORMAT_P010; + templat.width = enc->base.width; + templat.height = enc->base.height; + templat.interlaced = false; + + if (!(tmp_buf = context->create_video_buffer(context, &templat))) { + RVID_ERR("Can't create video buffer.\n"); + goto error; + } + + enc->cpb_num = get_cpb_num(enc); + + if (!enc->cpb_num) + goto error; + + get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); + + cpb_size = (sscreen->info.chip_class < GFX9) + ? align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * + align(tmp_surf->u.legacy.level[0].nblk_y, 32) + : align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) * + align(tmp_surf->u.gfx9.surf_height, 32); + + cpb_size = cpb_size * 3 / 2; + cpb_size = cpb_size * enc->cpb_num; + tmp_buf->destroy(tmp_buf); + + if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't create CPB buffer.\n"); + goto error; + } + + if (sscreen->info.family <= CHIP_RAVEN2) + radeon_enc_1_2_init(enc); + else + radeon_enc_2_0_init(enc); + + return &enc->base; error: - if (enc->cs) - enc->ws->cs_destroy(enc->cs); + if (enc->cs) + enc->ws->cs_destroy(enc->cs); - si_vid_destroy_buffer(&enc->cpb); + si_vid_destroy_buffer(&enc->cpb); - FREE(enc); - return NULL; + FREE(enc); + return NULL; } void radeon_enc_add_buffer(struct radeon_encoder *enc, struct pb_buffer *buf, - enum radeon_bo_usage usage, enum radeon_bo_domain domain, - signed offset) + enum radeon_bo_usage usage, enum radeon_bo_domain domain, signed offset) { - enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); - uint64_t addr; - addr = enc->ws->buffer_get_virtual_address(buf); - addr = addr + offset; - RADEON_ENC_CS(addr >> 32); - RADEON_ENC_CS(addr); + enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + uint64_t addr; + addr = enc->ws->buffer_get_virtual_address(buf); + addr = addr + offset; + RADEON_ENC_CS(addr >> 32); + RADEON_ENC_CS(addr); } void radeon_enc_set_emulation_prevention(struct radeon_encoder *enc, bool set) { - if (set != enc->emulation_prevention) { - enc->emulation_prevention = set; - enc->num_zeros = 0; - } + if (set != enc->emulation_prevention) { + enc->emulation_prevention = set; + enc->num_zeros = 0; + } } void radeon_enc_output_one_byte(struct radeon_encoder *enc, unsigned char byte) { - if (enc->byte_index == 0) - enc->cs->current.buf[enc->cs->current.cdw] = 0; - enc->cs->current.buf[enc->cs->current.cdw] |= ((unsigned int)(byte) << index_to_shifts[enc->byte_index]); - enc->byte_index++; - - if (enc->byte_index >= 4) { - enc->byte_index = 0; - enc->cs->current.cdw++; - } + if (enc->byte_index == 0) + enc->cs->current.buf[enc->cs->current.cdw] = 0; + enc->cs->current.buf[enc->cs->current.cdw] |= + ((unsigned int)(byte) << index_to_shifts[enc->byte_index]); + enc->byte_index++; + + if (enc->byte_index >= 4) { + enc->byte_index = 0; + enc->cs->current.cdw++; + } } void radeon_enc_emulation_prevention(struct radeon_encoder *enc, unsigned char byte) { - if(enc->emulation_prevention) { - if((enc->num_zeros >= 2) && ((byte == 0x00) || (byte == 0x01) || (byte == 0x03))) { - radeon_enc_output_one_byte(enc, 0x03); - enc->bits_output += 8; - enc->num_zeros = 0; - } - enc->num_zeros = (byte == 0 ? (enc->num_zeros + 1) : 0); - } + if (enc->emulation_prevention) { + if ((enc->num_zeros >= 2) && ((byte == 0x00) || (byte == 0x01) || (byte == 0x03))) { + radeon_enc_output_one_byte(enc, 0x03); + enc->bits_output += 8; + enc->num_zeros = 0; + } + enc->num_zeros = (byte == 0 ? (enc->num_zeros + 1) : 0); + } } -void radeon_enc_code_fixed_bits(struct radeon_encoder *enc, unsigned int value, unsigned int num_bits) +void radeon_enc_code_fixed_bits(struct radeon_encoder *enc, unsigned int value, + unsigned int num_bits) { - unsigned int bits_to_pack = 0; - - while(num_bits > 0) { - unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits)); - bits_to_pack = num_bits > (32 - enc->bits_in_shifter) ? (32 - enc->bits_in_shifter) : num_bits; - - if (bits_to_pack < num_bits) - value_to_pack = value_to_pack >> (num_bits - bits_to_pack); - - enc->shifter |= value_to_pack << (32 - enc->bits_in_shifter - bits_to_pack); - num_bits -= bits_to_pack; - enc->bits_in_shifter += bits_to_pack; - - while(enc->bits_in_shifter >= 8) { - unsigned char output_byte = (unsigned char)(enc->shifter >> 24); - enc->shifter <<= 8; - radeon_enc_emulation_prevention(enc, output_byte); - radeon_enc_output_one_byte(enc, output_byte); - enc->bits_in_shifter -= 8; - enc->bits_output += 8; - } - } + unsigned int bits_to_pack = 0; + + while (num_bits > 0) { + unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits)); + bits_to_pack = + num_bits > (32 - enc->bits_in_shifter) ? (32 - enc->bits_in_shifter) : num_bits; + + if (bits_to_pack < num_bits) + value_to_pack = value_to_pack >> (num_bits - bits_to_pack); + + enc->shifter |= value_to_pack << (32 - enc->bits_in_shifter - bits_to_pack); + num_bits -= bits_to_pack; + enc->bits_in_shifter += bits_to_pack; + + while (enc->bits_in_shifter >= 8) { + unsigned char output_byte = (unsigned char)(enc->shifter >> 24); + enc->shifter <<= 8; + radeon_enc_emulation_prevention(enc, output_byte); + radeon_enc_output_one_byte(enc, output_byte); + enc->bits_in_shifter -= 8; + enc->bits_output += 8; + } + } } void radeon_enc_reset(struct radeon_encoder *enc) { - enc->emulation_prevention = false; - enc->shifter = 0; - enc->bits_in_shifter = 0; - enc->bits_output = 0; - enc->num_zeros = 0; - enc->byte_index = 0; + enc->emulation_prevention = false; + enc->shifter = 0; + enc->bits_in_shifter = 0; + enc->bits_output = 0; + enc->num_zeros = 0; + enc->byte_index = 0; } void radeon_enc_byte_align(struct radeon_encoder *enc) { - unsigned int num_padding_zeros = (32 - enc->bits_in_shifter) % 8; + unsigned int num_padding_zeros = (32 - enc->bits_in_shifter) % 8; - if (num_padding_zeros > 0) - radeon_enc_code_fixed_bits(enc, 0, num_padding_zeros); + if (num_padding_zeros > 0) + radeon_enc_code_fixed_bits(enc, 0, num_padding_zeros); } void radeon_enc_flush_headers(struct radeon_encoder *enc) { - if (enc->bits_in_shifter != 0) { - unsigned char output_byte = (unsigned char)(enc->shifter >> 24); - radeon_enc_emulation_prevention(enc, output_byte); - radeon_enc_output_one_byte(enc, output_byte); - enc->bits_output += enc->bits_in_shifter; - enc->shifter = 0; - enc->bits_in_shifter = 0; - enc->num_zeros = 0; - } - - if (enc->byte_index > 0) { - enc->cs->current.cdw++; - enc->byte_index = 0; - } + if (enc->bits_in_shifter != 0) { + unsigned char output_byte = (unsigned char)(enc->shifter >> 24); + radeon_enc_emulation_prevention(enc, output_byte); + radeon_enc_output_one_byte(enc, output_byte); + enc->bits_output += enc->bits_in_shifter; + enc->shifter = 0; + enc->bits_in_shifter = 0; + enc->num_zeros = 0; + } + + if (enc->byte_index > 0) { + enc->cs->current.cdw++; + enc->byte_index = 0; + } } void radeon_enc_code_ue(struct radeon_encoder *enc, unsigned int value) { - int x = -1; - unsigned int ue_code = value + 1; - value += 1; + int x = -1; + unsigned int ue_code = value + 1; + value += 1; - while (value) { - value = (value >> 1); - x += 1; - } + while (value) { + value = (value >> 1); + x += 1; + } - unsigned int ue_length = (x << 1) + 1; - radeon_enc_code_fixed_bits(enc, ue_code, ue_length); + unsigned int ue_length = (x << 1) + 1; + radeon_enc_code_fixed_bits(enc, ue_code, ue_length); } void radeon_enc_code_se(struct radeon_encoder *enc, int value) { - unsigned int v = 0; + unsigned int v = 0; - if (value != 0) - v = (value < 0 ? ((unsigned int)(0 - value) << 1) : (((unsigned int)(value) << 1) - 1)); + if (value != 0) + v = (value < 0 ? ((unsigned int)(0 - value) << 1) : (((unsigned int)(value) << 1) - 1)); - radeon_enc_code_ue(enc, v); + radeon_enc_code_ue(enc, v); } diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h b/src/gallium/drivers/radeon/radeon_vcn_enc.h index fcf44077558..011f66b47ec 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc.h +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h @@ -30,525 +30,500 @@ #include "radeon_video.h" -#define RENCODE_IB_OP_INITIALIZE 0x01000001 -#define RENCODE_IB_OP_CLOSE_SESSION 0x01000002 -#define RENCODE_IB_OP_ENCODE 0x01000003 -#define RENCODE_IB_OP_INIT_RC 0x01000004 -#define RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x01000005 -#define RENCODE_IB_OP_SET_SPEED_ENCODING_MODE 0x01000006 -#define RENCODE_IB_OP_SET_BALANCE_ENCODING_MODE 0x01000007 -#define RENCODE_IB_OP_SET_QUALITY_ENCODING_MODE 0x01000008 +#define RENCODE_IB_OP_INITIALIZE 0x01000001 +#define RENCODE_IB_OP_CLOSE_SESSION 0x01000002 +#define RENCODE_IB_OP_ENCODE 0x01000003 +#define RENCODE_IB_OP_INIT_RC 0x01000004 +#define RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x01000005 +#define RENCODE_IB_OP_SET_SPEED_ENCODING_MODE 0x01000006 +#define RENCODE_IB_OP_SET_BALANCE_ENCODING_MODE 0x01000007 +#define RENCODE_IB_OP_SET_QUALITY_ENCODING_MODE 0x01000008 -#define RENCODE_IF_MAJOR_VERSION_MASK 0xFFFF0000 -#define RENCODE_IF_MAJOR_VERSION_SHIFT 16 -#define RENCODE_IF_MINOR_VERSION_MASK 0x0000FFFF -#define RENCODE_IF_MINOR_VERSION_SHIFT 0 +#define RENCODE_IF_MAJOR_VERSION_MASK 0xFFFF0000 +#define RENCODE_IF_MAJOR_VERSION_SHIFT 16 +#define RENCODE_IF_MINOR_VERSION_MASK 0x0000FFFF +#define RENCODE_IF_MINOR_VERSION_SHIFT 0 -#define RENCODE_ENGINE_TYPE_ENCODE 1 +#define RENCODE_ENGINE_TYPE_ENCODE 1 -#define RENCODE_ENCODE_STANDARD_HEVC 0 -#define RENCODE_ENCODE_STANDARD_H264 1 +#define RENCODE_ENCODE_STANDARD_HEVC 0 +#define RENCODE_ENCODE_STANDARD_H264 1 -#define RENCODE_PREENCODE_MODE_NONE 0x00000000 -#define RENCODE_PREENCODE_MODE_1X 0x00000001 -#define RENCODE_PREENCODE_MODE_2X 0x00000002 -#define RENCODE_PREENCODE_MODE_4X 0x00000004 +#define RENCODE_PREENCODE_MODE_NONE 0x00000000 +#define RENCODE_PREENCODE_MODE_1X 0x00000001 +#define RENCODE_PREENCODE_MODE_2X 0x00000002 +#define RENCODE_PREENCODE_MODE_4X 0x00000004 -#define RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS 0x00000000 -#define RENCODE_H264_SLICE_CONTROL_MODE_FIXED_BITS 0x00000001 +#define RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS 0x00000000 +#define RENCODE_H264_SLICE_CONTROL_MODE_FIXED_BITS 0x00000001 -#define RENCODE_HEVC_SLICE_CONTROL_MODE_FIXED_CTBS 0x00000000 -#define RENCODE_HEVC_SLICE_CONTROL_MODE_FIXED_BITS 0x00000001 +#define RENCODE_HEVC_SLICE_CONTROL_MODE_FIXED_CTBS 0x00000000 +#define RENCODE_HEVC_SLICE_CONTROL_MODE_FIXED_BITS 0x00000001 -#define RENCODE_RATE_CONTROL_METHOD_NONE 0x00000000 -#define RENCODE_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR 0x00000001 -#define RENCODE_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR 0x00000002 -#define RENCODE_RATE_CONTROL_METHOD_CBR 0x00000003 +#define RENCODE_RATE_CONTROL_METHOD_NONE 0x00000000 +#define RENCODE_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR 0x00000001 +#define RENCODE_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR 0x00000002 +#define RENCODE_RATE_CONTROL_METHOD_CBR 0x00000003 -#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_AUD 0x00000000 -#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_VPS 0x00000001 -#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS 0x00000002 -#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS 0x00000003 -#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PREFIX 0x00000004 -#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_END_OF_SEQUENCE 0x00000005 +#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_AUD 0x00000000 +#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_VPS 0x00000001 +#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS 0x00000002 +#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS 0x00000003 +#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PREFIX 0x00000004 +#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_END_OF_SEQUENCE 0x00000005 -#define RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16 -#define RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16 +#define RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16 +#define RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16 -#define RENCODE_HEADER_INSTRUCTION_END 0x00000000 -#define RENCODE_HEADER_INSTRUCTION_COPY 0x00000001 +#define RENCODE_HEADER_INSTRUCTION_END 0x00000000 +#define RENCODE_HEADER_INSTRUCTION_COPY 0x00000001 -#define RENCODE_HEVC_HEADER_INSTRUCTION_DEPENDENT_SLICE_END 0x00010000 -#define RENCODE_HEVC_HEADER_INSTRUCTION_FIRST_SLICE 0x00010001 -#define RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_SEGMENT 0x00010002 -#define RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_QP_DELTA 0x00010003 +#define RENCODE_HEVC_HEADER_INSTRUCTION_DEPENDENT_SLICE_END 0x00010000 +#define RENCODE_HEVC_HEADER_INSTRUCTION_FIRST_SLICE 0x00010001 +#define RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_SEGMENT 0x00010002 +#define RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_QP_DELTA 0x00010003 -#define RENCODE_H264_HEADER_INSTRUCTION_FIRST_MB 0x00020000 -#define RENCODE_H264_HEADER_INSTRUCTION_SLICE_QP_DELTA 0x00020001 +#define RENCODE_H264_HEADER_INSTRUCTION_FIRST_MB 0x00020000 +#define RENCODE_H264_HEADER_INSTRUCTION_SLICE_QP_DELTA 0x00020001 -#define RENCODE_PICTURE_TYPE_B 0 -#define RENCODE_PICTURE_TYPE_P 1 -#define RENCODE_PICTURE_TYPE_I 2 -#define RENCODE_PICTURE_TYPE_P_SKIP 3 +#define RENCODE_PICTURE_TYPE_B 0 +#define RENCODE_PICTURE_TYPE_P 1 +#define RENCODE_PICTURE_TYPE_I 2 +#define RENCODE_PICTURE_TYPE_P_SKIP 3 -#define RENCODE_INPUT_SWIZZLE_MODE_LINEAR 0 -#define RENCODE_INPUT_SWIZZLE_MODE_256B_S 1 -#define RENCODE_INPUT_SWIZZLE_MODE_4kB_S 5 -#define RENCODE_INPUT_SWIZZLE_MODE_64kB_S 9 +#define RENCODE_INPUT_SWIZZLE_MODE_LINEAR 0 +#define RENCODE_INPUT_SWIZZLE_MODE_256B_S 1 +#define RENCODE_INPUT_SWIZZLE_MODE_4kB_S 5 +#define RENCODE_INPUT_SWIZZLE_MODE_64kB_S 9 -#define RENCODE_H264_PICTURE_STRUCTURE_FRAME 0 -#define RENCODE_H264_PICTURE_STRUCTURE_TOP_FIELD 1 -#define RENCODE_H264_PICTURE_STRUCTURE_BOTTOM_FIELD 2 +#define RENCODE_H264_PICTURE_STRUCTURE_FRAME 0 +#define RENCODE_H264_PICTURE_STRUCTURE_TOP_FIELD 1 +#define RENCODE_H264_PICTURE_STRUCTURE_BOTTOM_FIELD 2 -#define RENCODE_H264_INTERLACING_MODE_PROGRESSIVE 0 -#define RENCODE_H264_INTERLACING_MODE_INTERLACED_STACKED 1 -#define RENCODE_H264_INTERLACING_MODE_INTERLACED_INTERLEAVED 2 +#define RENCODE_H264_INTERLACING_MODE_PROGRESSIVE 0 +#define RENCODE_H264_INTERLACING_MODE_INTERLACED_STACKED 1 +#define RENCODE_H264_INTERLACING_MODE_INTERLACED_INTERLEAVED 2 -#define RENCODE_H264_DISABLE_DEBLOCKING_FILTER_IDC_ENABLE 0 -#define RENCODE_H264_DISABLE_DEBLOCKING_FILTER_IDC_DISABLE 1 -#define RENCODE_H264_DISABLE_DEBLOCKING_FILTER_IDC_DISALBE_ACROSS_SLICE_BOUNDARY 2 +#define RENCODE_H264_DISABLE_DEBLOCKING_FILTER_IDC_ENABLE 0 +#define RENCODE_H264_DISABLE_DEBLOCKING_FILTER_IDC_DISABLE 1 +#define RENCODE_H264_DISABLE_DEBLOCKING_FILTER_IDC_DISALBE_ACROSS_SLICE_BOUNDARY 2 -#define RENCODE_INTRA_REFRESH_MODE_NONE 0 -#define RENCODE_INTRA_REFRESH_MODE_CTB_MB_ROWS 1 -#define RENCODE_INTRA_REFRESH_MODE_CTB_MB_COLUMNS 2 +#define RENCODE_INTRA_REFRESH_MODE_NONE 0 +#define RENCODE_INTRA_REFRESH_MODE_CTB_MB_ROWS 1 +#define RENCODE_INTRA_REFRESH_MODE_CTB_MB_COLUMNS 2 -#define RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES 34 +#define RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES 34 -#define RENCODE_REC_SWIZZLE_MODE_LINEAR 0 -#define RENCODE_REC_SWIZZLE_MODE_256B_S 1 +#define RENCODE_REC_SWIZZLE_MODE_LINEAR 0 +#define RENCODE_REC_SWIZZLE_MODE_256B_S 1 -#define RENCODE_VIDEO_BITSTREAM_BUFFER_MODE_LINEAR 0 -#define RENCODE_VIDEO_BITSTREAM_BUFFER_MODE_CIRCULAR 1 +#define RENCODE_VIDEO_BITSTREAM_BUFFER_MODE_LINEAR 0 +#define RENCODE_VIDEO_BITSTREAM_BUFFER_MODE_CIRCULAR 1 -#define RENCODE_FEEDBACK_BUFFER_MODE_LINEAR 0 -#define RENCODE_FEEDBACK_BUFFER_MODE_CIRCULAR 1 +#define RENCODE_FEEDBACK_BUFFER_MODE_LINEAR 0 +#define RENCODE_FEEDBACK_BUFFER_MODE_CIRCULAR 1 #define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value)) -#define RADEON_ENC_BEGIN(cmd) { \ - uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ -RADEON_ENC_CS(cmd) -#define RADEON_ENC_READ(buf, domain, off) radeon_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) -#define RADEON_ENC_WRITE(buf, domain, off) radeon_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off)) -#define RADEON_ENC_READWRITE(buf, domain, off) radeon_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) -#define RADEON_ENC_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \ - enc->total_task_size += *begin;} - -typedef struct rvcn_enc_session_info_s -{ - uint32_t interface_version; - uint32_t sw_context_address_hi; - uint32_t sw_context_address_lo; +#define RADEON_ENC_BEGIN(cmd) \ + { \ + uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ + RADEON_ENC_CS(cmd) +#define RADEON_ENC_READ(buf, domain, off) \ + radeon_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) +#define RADEON_ENC_WRITE(buf, domain, off) \ + radeon_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off)) +#define RADEON_ENC_READWRITE(buf, domain, off) \ + radeon_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) +#define RADEON_ENC_END() \ + *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \ + enc->total_task_size += *begin; \ + } + +typedef struct rvcn_enc_session_info_s { + uint32_t interface_version; + uint32_t sw_context_address_hi; + uint32_t sw_context_address_lo; } rvcn_enc_session_info_t; -typedef struct rvcn_enc_task_info_s -{ - uint32_t total_size_of_all_packages; - uint32_t task_id; - uint32_t allowed_max_num_feedbacks; +typedef struct rvcn_enc_task_info_s { + uint32_t total_size_of_all_packages; + uint32_t task_id; + uint32_t allowed_max_num_feedbacks; } rvcn_enc_task_info_t; -typedef struct rvcn_enc_session_init_s -{ - uint32_t encode_standard; - uint32_t aligned_picture_width; - uint32_t aligned_picture_height; - uint32_t padding_width; - uint32_t padding_height; - uint32_t pre_encode_mode; - uint32_t pre_encode_chroma_enabled; +typedef struct rvcn_enc_session_init_s { + uint32_t encode_standard; + uint32_t aligned_picture_width; + uint32_t aligned_picture_height; + uint32_t padding_width; + uint32_t padding_height; + uint32_t pre_encode_mode; + uint32_t pre_encode_chroma_enabled; } rvcn_enc_session_init_t; -typedef struct rvcn_enc_layer_control_s -{ - uint32_t max_num_temporal_layers; - uint32_t num_temporal_layers; +typedef struct rvcn_enc_layer_control_s { + uint32_t max_num_temporal_layers; + uint32_t num_temporal_layers; } rvcn_enc_layer_control_t; -typedef struct rvcn_enc_layer_select_s -{ - uint32_t temporal_layer_index; +typedef struct rvcn_enc_layer_select_s { + uint32_t temporal_layer_index; } rvcn_enc_layer_select_t; -typedef struct rvcn_enc_h264_slice_control_s -{ - uint32_t slice_control_mode; - union - { - uint32_t num_mbs_per_slice; - uint32_t num_bits_per_slice; - }; +typedef struct rvcn_enc_h264_slice_control_s { + uint32_t slice_control_mode; + union { + uint32_t num_mbs_per_slice; + uint32_t num_bits_per_slice; + }; } rvcn_enc_h264_slice_control_t; -typedef struct rvcn_enc_hevc_slice_control_s -{ - uint32_t slice_control_mode; - union - { - struct - { - uint32_t num_ctbs_per_slice; - uint32_t num_ctbs_per_slice_segment; - } fixed_ctbs_per_slice; - - struct - { - uint32_t num_bits_per_slice; - uint32_t num_bits_per_slice_segment; - } fixed_bits_per_slice; - }; +typedef struct rvcn_enc_hevc_slice_control_s { + uint32_t slice_control_mode; + union { + struct { + uint32_t num_ctbs_per_slice; + uint32_t num_ctbs_per_slice_segment; + } fixed_ctbs_per_slice; + + struct { + uint32_t num_bits_per_slice; + uint32_t num_bits_per_slice_segment; + } fixed_bits_per_slice; + }; } rvcn_enc_hevc_slice_control_t; -typedef struct rvcn_enc_h264_spec_misc_s -{ - uint32_t constrained_intra_pred_flag; - uint32_t cabac_enable; - uint32_t cabac_init_idc; - uint32_t half_pel_enabled; - uint32_t quarter_pel_enabled; - uint32_t profile_idc; - uint32_t level_idc; +typedef struct rvcn_enc_h264_spec_misc_s { + uint32_t constrained_intra_pred_flag; + uint32_t cabac_enable; + uint32_t cabac_init_idc; + uint32_t half_pel_enabled; + uint32_t quarter_pel_enabled; + uint32_t profile_idc; + uint32_t level_idc; } rvcn_enc_h264_spec_misc_t; -typedef struct rvcn_enc_hevc_spec_misc_s -{ - uint32_t log2_min_luma_coding_block_size_minus3; - uint32_t amp_disabled; - uint32_t strong_intra_smoothing_enabled; - uint32_t constrained_intra_pred_flag; - uint32_t cabac_init_flag; - uint32_t half_pel_enabled; - uint32_t quarter_pel_enabled; +typedef struct rvcn_enc_hevc_spec_misc_s { + uint32_t log2_min_luma_coding_block_size_minus3; + uint32_t amp_disabled; + uint32_t strong_intra_smoothing_enabled; + uint32_t constrained_intra_pred_flag; + uint32_t cabac_init_flag; + uint32_t half_pel_enabled; + uint32_t quarter_pel_enabled; } rvcn_enc_hevc_spec_misc_t; -typedef struct rvcn_enc_rate_ctl_session_init_s -{ - uint32_t rate_control_method; - uint32_t vbv_buffer_level; +typedef struct rvcn_enc_rate_ctl_session_init_s { + uint32_t rate_control_method; + uint32_t vbv_buffer_level; } rvcn_enc_rate_ctl_session_init_t; -typedef struct rvcn_enc_rate_ctl_layer_init_s -{ - uint32_t target_bit_rate; - uint32_t peak_bit_rate; - uint32_t frame_rate_num; - uint32_t frame_rate_den; - uint32_t vbv_buffer_size; - uint32_t avg_target_bits_per_picture; - uint32_t peak_bits_per_picture_integer; - uint32_t peak_bits_per_picture_fractional; +typedef struct rvcn_enc_rate_ctl_layer_init_s { + uint32_t target_bit_rate; + uint32_t peak_bit_rate; + uint32_t frame_rate_num; + uint32_t frame_rate_den; + uint32_t vbv_buffer_size; + uint32_t avg_target_bits_per_picture; + uint32_t peak_bits_per_picture_integer; + uint32_t peak_bits_per_picture_fractional; } rvcn_enc_rate_ctl_layer_init_t; -typedef struct rvcn_enc_rate_ctl_per_picture_s -{ - uint32_t qp; - uint32_t min_qp_app; - uint32_t max_qp_app; - uint32_t max_au_size; - uint32_t enabled_filler_data; - uint32_t skip_frame_enable; - uint32_t enforce_hrd; +typedef struct rvcn_enc_rate_ctl_per_picture_s { + uint32_t qp; + uint32_t min_qp_app; + uint32_t max_qp_app; + uint32_t max_au_size; + uint32_t enabled_filler_data; + uint32_t skip_frame_enable; + uint32_t enforce_hrd; } rvcn_enc_rate_ctl_per_picture_t; -typedef struct rvcn_enc_quality_params_s -{ - uint32_t vbaq_mode; - uint32_t scene_change_sensitivity; - uint32_t scene_change_min_idr_interval; - uint32_t two_pass_search_center_map_mode; +typedef struct rvcn_enc_quality_params_s { + uint32_t vbaq_mode; + uint32_t scene_change_sensitivity; + uint32_t scene_change_min_idr_interval; + uint32_t two_pass_search_center_map_mode; } rvcn_enc_quality_params_t; -typedef struct rvcn_enc_direct_output_nalu_s -{ - uint32_t type; - uint32_t size; - uint32_t data[1]; +typedef struct rvcn_enc_direct_output_nalu_s { + uint32_t type; + uint32_t size; + uint32_t data[1]; } rvcn_enc_direct_output_nalu_t; -typedef struct rvcn_enc_slice_header_s -{ - uint32_t bitstream_template[RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS]; - struct { - uint32_t instruction; - uint32_t num_bits; - } instructions[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS]; +typedef struct rvcn_enc_slice_header_s { + uint32_t bitstream_template[RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS]; + struct { + uint32_t instruction; + uint32_t num_bits; + } instructions[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS]; } rvcn_enc_slice_header_t; -typedef struct rvcn_enc_encode_params_s -{ - uint32_t pic_type; - uint32_t allowed_max_bitstream_size; - uint32_t input_picture_luma_address_hi; - uint32_t input_picture_luma_address_lo; - uint32_t input_picture_chroma_address_hi; - uint32_t input_picture_chroma_address_lo; - uint32_t input_pic_luma_pitch; - uint32_t input_pic_chroma_pitch; - uint8_t input_pic_swizzle_mode; - uint32_t reference_picture_index; - uint32_t reconstructed_picture_index; +typedef struct rvcn_enc_encode_params_s { + uint32_t pic_type; + uint32_t allowed_max_bitstream_size; + uint32_t input_picture_luma_address_hi; + uint32_t input_picture_luma_address_lo; + uint32_t input_picture_chroma_address_hi; + uint32_t input_picture_chroma_address_lo; + uint32_t input_pic_luma_pitch; + uint32_t input_pic_chroma_pitch; + uint8_t input_pic_swizzle_mode; + uint32_t reference_picture_index; + uint32_t reconstructed_picture_index; } rvcn_enc_encode_params_t; -typedef struct rvcn_enc_h264_encode_params_s -{ - uint32_t input_picture_structure; - uint32_t interlaced_mode; - uint32_t reference_picture_structure; - uint32_t reference_picture1_index; +typedef struct rvcn_enc_h264_encode_params_s { + uint32_t input_picture_structure; + uint32_t interlaced_mode; + uint32_t reference_picture_structure; + uint32_t reference_picture1_index; } rvcn_enc_h264_encode_params_t; -typedef struct rvcn_enc_h264_deblocking_filter_s -{ - uint32_t disable_deblocking_filter_idc; - int32_t alpha_c0_offset_div2; - int32_t beta_offset_div2; - int32_t cb_qp_offset; - int32_t cr_qp_offset; +typedef struct rvcn_enc_h264_deblocking_filter_s { + uint32_t disable_deblocking_filter_idc; + int32_t alpha_c0_offset_div2; + int32_t beta_offset_div2; + int32_t cb_qp_offset; + int32_t cr_qp_offset; } rvcn_enc_h264_deblocking_filter_t; -typedef struct rvcn_enc_hevc_deblocking_filter_s -{ - uint32_t loop_filter_across_slices_enabled; - int32_t deblocking_filter_disabled; - int32_t beta_offset_div2; - int32_t tc_offset_div2; - int32_t cb_qp_offset; - int32_t cr_qp_offset; +typedef struct rvcn_enc_hevc_deblocking_filter_s { + uint32_t loop_filter_across_slices_enabled; + int32_t deblocking_filter_disabled; + int32_t beta_offset_div2; + int32_t tc_offset_div2; + int32_t cb_qp_offset; + int32_t cr_qp_offset; } rvcn_enc_hevc_deblocking_filter_t; -typedef struct rvcn_enc_intra_refresh_s -{ - uint32_t intra_refresh_mode; - uint32_t offset; - uint32_t region_size; +typedef struct rvcn_enc_intra_refresh_s { + uint32_t intra_refresh_mode; + uint32_t offset; + uint32_t region_size; } rvcn_enc_intra_refresh_t; -typedef struct rvcn_enc_reconstructed_picture_s -{ - uint32_t luma_offset; - uint32_t chroma_offset; +typedef struct rvcn_enc_reconstructed_picture_s { + uint32_t luma_offset; + uint32_t chroma_offset; } rvcn_enc_reconstructed_picture_t; -typedef struct rvcn_enc_encode_context_buffer_s -{ - uint32_t encode_context_address_hi; - uint32_t encode_context_address_lo; - uint32_t swizzle_mode; - uint32_t rec_luma_pitch; - uint32_t rec_chroma_pitch; - uint32_t num_reconstructed_pictures; - rvcn_enc_reconstructed_picture_t reconstructed_pictures[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES]; - uint32_t pre_encode_picture_luma_pitch; - uint32_t pre_encode_picture_chroma_pitch; - rvcn_enc_reconstructed_picture_t pre_encode_reconstructed_pictures[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES]; - rvcn_enc_reconstructed_picture_t pre_encode_input_picture; +typedef struct rvcn_enc_encode_context_buffer_s { + uint32_t encode_context_address_hi; + uint32_t encode_context_address_lo; + uint32_t swizzle_mode; + uint32_t rec_luma_pitch; + uint32_t rec_chroma_pitch; + uint32_t num_reconstructed_pictures; + rvcn_enc_reconstructed_picture_t reconstructed_pictures[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES]; + uint32_t pre_encode_picture_luma_pitch; + uint32_t pre_encode_picture_chroma_pitch; + rvcn_enc_reconstructed_picture_t + pre_encode_reconstructed_pictures[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES]; + rvcn_enc_reconstructed_picture_t pre_encode_input_picture; } rvcn_enc_encode_context_buffer_t; -typedef struct rvcn_enc_video_bitstream_buffer_s -{ - uint32_t mode; - uint32_t video_bitstream_buffer_address_hi; - uint32_t video_bitstream_buffer_address_lo; - uint32_t video_bitstream_buffer_size; - uint32_t video_bitstream_data_offset; +typedef struct rvcn_enc_video_bitstream_buffer_s { + uint32_t mode; + uint32_t video_bitstream_buffer_address_hi; + uint32_t video_bitstream_buffer_address_lo; + uint32_t video_bitstream_buffer_size; + uint32_t video_bitstream_data_offset; } rvcn_enc_video_bitstream_buffer_t; -typedef struct rvcn_enc_feedback_buffer_s -{ - uint32_t mode; - uint32_t feedback_buffer_address_hi; - uint32_t feedback_buffer_address_lo; - uint32_t feedback_buffer_size; - uint32_t feedback_data_size; +typedef struct rvcn_enc_feedback_buffer_s { + uint32_t mode; + uint32_t feedback_buffer_address_hi; + uint32_t feedback_buffer_address_lo; + uint32_t feedback_buffer_size; + uint32_t feedback_data_size; } rvcn_enc_feedback_buffer_t; -typedef struct rvcn_enc_cmd_s -{ - uint32_t session_info; - uint32_t task_info; - uint32_t session_init; - uint32_t layer_control; - uint32_t layer_select; - uint32_t rc_session_init; - uint32_t rc_layer_init; - uint32_t rc_per_pic; - uint32_t quality_params; - uint32_t slice_header; - uint32_t enc_params; - uint32_t intra_refresh; - uint32_t ctx; - uint32_t bitstream; - uint32_t feedback; - uint32_t nalu; - uint32_t slice_control_hevc; - uint32_t spec_misc_hevc; - uint32_t enc_params_hevc; - uint32_t deblocking_filter_hevc; - uint32_t slice_control_h264; - uint32_t spec_misc_h264; - uint32_t enc_params_h264; - uint32_t deblocking_filter_h264; - uint32_t input_format; - uint32_t output_format; +typedef struct rvcn_enc_cmd_s { + uint32_t session_info; + uint32_t task_info; + uint32_t session_init; + uint32_t layer_control; + uint32_t layer_select; + uint32_t rc_session_init; + uint32_t rc_layer_init; + uint32_t rc_per_pic; + uint32_t quality_params; + uint32_t slice_header; + uint32_t enc_params; + uint32_t intra_refresh; + uint32_t ctx; + uint32_t bitstream; + uint32_t feedback; + uint32_t nalu; + uint32_t slice_control_hevc; + uint32_t spec_misc_hevc; + uint32_t enc_params_hevc; + uint32_t deblocking_filter_hevc; + uint32_t slice_control_h264; + uint32_t spec_misc_h264; + uint32_t enc_params_h264; + uint32_t deblocking_filter_h264; + uint32_t input_format; + uint32_t output_format; } rvcn_enc_cmd_t; -typedef void (*radeon_enc_get_buffer)(struct pipe_resource *resource, - struct pb_buffer **handle, - struct radeon_surf **surface); +typedef void (*radeon_enc_get_buffer)(struct pipe_resource *resource, struct pb_buffer **handle, + struct radeon_surf **surface); struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context, - const struct pipe_video_codec *templat, - struct radeon_winsys* ws, - radeon_enc_get_buffer get_buffer); + const struct pipe_video_codec *templat, + struct radeon_winsys *ws, + radeon_enc_get_buffer get_buffer); struct radeon_enc_pic { - enum pipe_h264_enc_picture_type picture_type; - - unsigned frame_num; - unsigned pic_order_cnt; - unsigned pic_order_cnt_type; - unsigned ref_idx_l0; - unsigned ref_idx_l1; - unsigned crop_left; - unsigned crop_right; - unsigned crop_top; - unsigned crop_bottom; - unsigned general_tier_flag; - unsigned general_profile_idc; - unsigned general_level_idc; - unsigned max_poc; - unsigned log2_max_poc; - unsigned chroma_format_idc; - unsigned pic_width_in_luma_samples; - unsigned pic_height_in_luma_samples; - unsigned log2_diff_max_min_luma_coding_block_size; - unsigned log2_min_transform_block_size_minus2; - unsigned log2_diff_max_min_transform_block_size; - unsigned max_transform_hierarchy_depth_inter; - unsigned max_transform_hierarchy_depth_intra; - unsigned log2_parallel_merge_level_minus2; - unsigned bit_depth_luma_minus8; - unsigned bit_depth_chroma_minus8; - unsigned nal_unit_type; - unsigned max_num_merge_cand; - - bool not_referenced; - bool is_idr; - bool is_even_frame; - bool sample_adaptive_offset_enabled_flag; - bool pcm_enabled_flag; - bool sps_temporal_mvp_enabled_flag; - - rvcn_enc_session_info_t session_info; - rvcn_enc_task_info_t task_info; - rvcn_enc_session_init_t session_init; - rvcn_enc_layer_control_t layer_ctrl; - rvcn_enc_layer_select_t layer_sel; - rvcn_enc_h264_slice_control_t slice_ctrl; - rvcn_enc_hevc_slice_control_t hevc_slice_ctrl; - rvcn_enc_h264_spec_misc_t spec_misc; - rvcn_enc_hevc_spec_misc_t hevc_spec_misc; - rvcn_enc_rate_ctl_session_init_t rc_session_init; - rvcn_enc_rate_ctl_layer_init_t rc_layer_init; - rvcn_enc_h264_encode_params_t h264_enc_params; - rvcn_enc_h264_deblocking_filter_t h264_deblock; - rvcn_enc_hevc_deblocking_filter_t hevc_deblock; - rvcn_enc_rate_ctl_per_picture_t rc_per_pic; - rvcn_enc_quality_params_t quality_params; - rvcn_enc_encode_context_buffer_t ctx_buf; - rvcn_enc_video_bitstream_buffer_t bit_buf; - rvcn_enc_feedback_buffer_t fb_buf; - rvcn_enc_intra_refresh_t intra_ref; - rvcn_enc_encode_params_t enc_params; + enum pipe_h264_enc_picture_type picture_type; + + unsigned frame_num; + unsigned pic_order_cnt; + unsigned pic_order_cnt_type; + unsigned ref_idx_l0; + unsigned ref_idx_l1; + unsigned crop_left; + unsigned crop_right; + unsigned crop_top; + unsigned crop_bottom; + unsigned general_tier_flag; + unsigned general_profile_idc; + unsigned general_level_idc; + unsigned max_poc; + unsigned log2_max_poc; + unsigned chroma_format_idc; + unsigned pic_width_in_luma_samples; + unsigned pic_height_in_luma_samples; + unsigned log2_diff_max_min_luma_coding_block_size; + unsigned log2_min_transform_block_size_minus2; + unsigned log2_diff_max_min_transform_block_size; + unsigned max_transform_hierarchy_depth_inter; + unsigned max_transform_hierarchy_depth_intra; + unsigned log2_parallel_merge_level_minus2; + unsigned bit_depth_luma_minus8; + unsigned bit_depth_chroma_minus8; + unsigned nal_unit_type; + unsigned max_num_merge_cand; + + bool not_referenced; + bool is_idr; + bool is_even_frame; + bool sample_adaptive_offset_enabled_flag; + bool pcm_enabled_flag; + bool sps_temporal_mvp_enabled_flag; + + rvcn_enc_session_info_t session_info; + rvcn_enc_task_info_t task_info; + rvcn_enc_session_init_t session_init; + rvcn_enc_layer_control_t layer_ctrl; + rvcn_enc_layer_select_t layer_sel; + rvcn_enc_h264_slice_control_t slice_ctrl; + rvcn_enc_hevc_slice_control_t hevc_slice_ctrl; + rvcn_enc_h264_spec_misc_t spec_misc; + rvcn_enc_hevc_spec_misc_t hevc_spec_misc; + rvcn_enc_rate_ctl_session_init_t rc_session_init; + rvcn_enc_rate_ctl_layer_init_t rc_layer_init; + rvcn_enc_h264_encode_params_t h264_enc_params; + rvcn_enc_h264_deblocking_filter_t h264_deblock; + rvcn_enc_hevc_deblocking_filter_t hevc_deblock; + rvcn_enc_rate_ctl_per_picture_t rc_per_pic; + rvcn_enc_quality_params_t quality_params; + rvcn_enc_encode_context_buffer_t ctx_buf; + rvcn_enc_video_bitstream_buffer_t bit_buf; + rvcn_enc_feedback_buffer_t fb_buf; + rvcn_enc_intra_refresh_t intra_ref; + rvcn_enc_encode_params_t enc_params; }; struct radeon_encoder { - struct pipe_video_codec base; - - void (*begin)(struct radeon_encoder *enc); - void (*encode)(struct radeon_encoder *enc); - void (*destroy)(struct radeon_encoder *enc); - void (*session_info)(struct radeon_encoder *enc); - void (*task_info)(struct radeon_encoder *enc, bool need_feedback); - void (*session_init)(struct radeon_encoder *enc); - void (*layer_control)(struct radeon_encoder *enc); - void (*layer_select)(struct radeon_encoder *enc); - void (*slice_control)(struct radeon_encoder *enc); - void (*spec_misc)(struct radeon_encoder *enc); - void (*rc_session_init)(struct radeon_encoder *enc); - void (*rc_layer_init)(struct radeon_encoder *enc); - void (*deblocking_filter)(struct radeon_encoder *enc); - void (*quality_params)(struct radeon_encoder *enc); - void (*nalu_sps)(struct radeon_encoder *enc); - void (*nalu_pps)(struct radeon_encoder *enc); - void (*nalu_vps)(struct radeon_encoder *enc); - void (*nalu_aud)(struct radeon_encoder *enc); - void (*slice_header)(struct radeon_encoder *enc); - void (*ctx)(struct radeon_encoder *enc); - void (*bitstream)(struct radeon_encoder *enc); - void (*feedback)(struct radeon_encoder *enc); - void (*intra_refresh)(struct radeon_encoder *enc); - void (*rc_per_pic)(struct radeon_encoder *enc); - void (*encode_params)(struct radeon_encoder *enc); - void (*encode_params_codec_spec)(struct radeon_encoder *enc); - void (*op_init)(struct radeon_encoder *enc); - void (*op_close)(struct radeon_encoder *enc); - void (*op_enc)(struct radeon_encoder *enc); - void (*op_init_rc)(struct radeon_encoder *enc); - void (*op_init_rc_vbv)(struct radeon_encoder *enc); - void (*op_speed)(struct radeon_encoder *enc); - void (*encode_headers)(struct radeon_encoder *enc); - void (*input_format)(struct radeon_encoder *enc); - void (*output_format)(struct radeon_encoder *enc); - - unsigned stream_handle; - - struct pipe_screen *screen; - struct radeon_winsys* ws; - struct radeon_cmdbuf* cs; - - radeon_enc_get_buffer get_buffer; - - struct pb_buffer* handle; - struct radeon_surf* luma; - struct radeon_surf* chroma; - - struct pb_buffer* bs_handle; - unsigned bs_size; - - unsigned cpb_num; - - struct rvid_buffer *si; - struct rvid_buffer *fb; - struct rvid_buffer cpb; - struct radeon_enc_pic enc_pic; - rvcn_enc_cmd_t cmd; - - unsigned alignment; - unsigned shifter; - unsigned bits_in_shifter; - unsigned num_zeros; - unsigned byte_index; - unsigned bits_output; - uint32_t total_task_size; - uint32_t* p_task_size; - - bool emulation_prevention; - bool need_feedback; + struct pipe_video_codec base; + + void (*begin)(struct radeon_encoder *enc); + void (*encode)(struct radeon_encoder *enc); + void (*destroy)(struct radeon_encoder *enc); + void (*session_info)(struct radeon_encoder *enc); + void (*task_info)(struct radeon_encoder *enc, bool need_feedback); + void (*session_init)(struct radeon_encoder *enc); + void (*layer_control)(struct radeon_encoder *enc); + void (*layer_select)(struct radeon_encoder *enc); + void (*slice_control)(struct radeon_encoder *enc); + void (*spec_misc)(struct radeon_encoder *enc); + void (*rc_session_init)(struct radeon_encoder *enc); + void (*rc_layer_init)(struct radeon_encoder *enc); + void (*deblocking_filter)(struct radeon_encoder *enc); + void (*quality_params)(struct radeon_encoder *enc); + void (*nalu_sps)(struct radeon_encoder *enc); + void (*nalu_pps)(struct radeon_encoder *enc); + void (*nalu_vps)(struct radeon_encoder *enc); + void (*nalu_aud)(struct radeon_encoder *enc); + void (*slice_header)(struct radeon_encoder *enc); + void (*ctx)(struct radeon_encoder *enc); + void (*bitstream)(struct radeon_encoder *enc); + void (*feedback)(struct radeon_encoder *enc); + void (*intra_refresh)(struct radeon_encoder *enc); + void (*rc_per_pic)(struct radeon_encoder *enc); + void (*encode_params)(struct radeon_encoder *enc); + void (*encode_params_codec_spec)(struct radeon_encoder *enc); + void (*op_init)(struct radeon_encoder *enc); + void (*op_close)(struct radeon_encoder *enc); + void (*op_enc)(struct radeon_encoder *enc); + void (*op_init_rc)(struct radeon_encoder *enc); + void (*op_init_rc_vbv)(struct radeon_encoder *enc); + void (*op_speed)(struct radeon_encoder *enc); + void (*encode_headers)(struct radeon_encoder *enc); + void (*input_format)(struct radeon_encoder *enc); + void (*output_format)(struct radeon_encoder *enc); + + unsigned stream_handle; + + struct pipe_screen *screen; + struct radeon_winsys *ws; + struct radeon_cmdbuf *cs; + + radeon_enc_get_buffer get_buffer; + + struct pb_buffer *handle; + struct radeon_surf *luma; + struct radeon_surf *chroma; + + struct pb_buffer *bs_handle; + unsigned bs_size; + + unsigned cpb_num; + + struct rvid_buffer *si; + struct rvid_buffer *fb; + struct rvid_buffer cpb; + struct radeon_enc_pic enc_pic; + rvcn_enc_cmd_t cmd; + + unsigned alignment; + unsigned shifter; + unsigned bits_in_shifter; + unsigned num_zeros; + unsigned byte_index; + unsigned bits_output; + uint32_t total_task_size; + uint32_t *p_task_size; + + bool emulation_prevention; + bool need_feedback; }; void radeon_enc_add_buffer(struct radeon_encoder *enc, struct pb_buffer *buf, - enum radeon_bo_usage usage, enum radeon_bo_domain domain, - signed offset); + enum radeon_bo_usage usage, enum radeon_bo_domain domain, signed offset); void radeon_enc_set_emulation_prevention(struct radeon_encoder *enc, bool set); void radeon_enc_output_one_byte(struct radeon_encoder *enc, unsigned char byte); -void radeon_enc_emulation_prevention(struct radeon_encoder *enc, - unsigned char byte); +void radeon_enc_emulation_prevention(struct radeon_encoder *enc, unsigned char byte); void radeon_enc_code_fixed_bits(struct radeon_encoder *enc, unsigned int value, - unsigned int num_bits); + unsigned int num_bits); void radeon_enc_reset(struct radeon_encoder *enc); @@ -564,4 +539,4 @@ void radeon_enc_1_2_init(struct radeon_encoder *enc); void radeon_enc_2_0_init(struct radeon_encoder *enc); -#endif // _RADEON_VCN_ENC_H +#endif // _RADEON_VCN_ENC_H diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c index c125fe49d55..7607987f0d5 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c @@ -25,1202 +25,1217 @@ * **************************************************************************/ -#include <stdio.h> - #include "pipe/p_video_codec.h" - +#include "radeon_vcn_enc.h" +#include "radeon_video.h" +#include "si_pipe.h" #include "util/u_video.h" -#include "si_pipe.h" -#include "radeon_video.h" -#include "radeon_vcn_enc.h" +#include <stdio.h> -#define RENCODE_FW_INTERFACE_MAJOR_VERSION 1 -#define RENCODE_FW_INTERFACE_MINOR_VERSION 2 - -#define RENCODE_IB_PARAM_SESSION_INFO 0x00000001 -#define RENCODE_IB_PARAM_TASK_INFO 0x00000002 -#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 -#define RENCODE_IB_PARAM_LAYER_CONTROL 0x00000004 -#define RENCODE_IB_PARAM_LAYER_SELECT 0x00000005 -#define RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000006 -#define RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000007 -#define RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x00000008 -#define RENCODE_IB_PARAM_QUALITY_PARAMS 0x00000009 -#define RENCODE_IB_PARAM_SLICE_HEADER 0x0000000a -#define RENCODE_IB_PARAM_ENCODE_PARAMS 0x0000000b -#define RENCODE_IB_PARAM_INTRA_REFRESH 0x0000000c -#define RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x0000000d -#define RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER 0x0000000e -#define RENCODE_IB_PARAM_FEEDBACK_BUFFER 0x00000010 -#define RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU 0x00000020 - -#define RENCODE_HEVC_IB_PARAM_SLICE_CONTROL 0x00100001 -#define RENCODE_HEVC_IB_PARAM_SPEC_MISC 0x00100002 -#define RENCODE_HEVC_IB_PARAM_DEBLOCKING_FILTER 0x00100003 - -#define RENCODE_H264_IB_PARAM_SLICE_CONTROL 0x00200001 -#define RENCODE_H264_IB_PARAM_SPEC_MISC 0x00200002 -#define RENCODE_H264_IB_PARAM_ENCODE_PARAMS 0x00200003 -#define RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER 0x00200004 +#define RENCODE_FW_INTERFACE_MAJOR_VERSION 1 +#define RENCODE_FW_INTERFACE_MINOR_VERSION 2 + +#define RENCODE_IB_PARAM_SESSION_INFO 0x00000001 +#define RENCODE_IB_PARAM_TASK_INFO 0x00000002 +#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 +#define RENCODE_IB_PARAM_LAYER_CONTROL 0x00000004 +#define RENCODE_IB_PARAM_LAYER_SELECT 0x00000005 +#define RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000006 +#define RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000007 +#define RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x00000008 +#define RENCODE_IB_PARAM_QUALITY_PARAMS 0x00000009 +#define RENCODE_IB_PARAM_SLICE_HEADER 0x0000000a +#define RENCODE_IB_PARAM_ENCODE_PARAMS 0x0000000b +#define RENCODE_IB_PARAM_INTRA_REFRESH 0x0000000c +#define RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x0000000d +#define RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER 0x0000000e +#define RENCODE_IB_PARAM_FEEDBACK_BUFFER 0x00000010 +#define RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU 0x00000020 + +#define RENCODE_HEVC_IB_PARAM_SLICE_CONTROL 0x00100001 +#define RENCODE_HEVC_IB_PARAM_SPEC_MISC 0x00100002 +#define RENCODE_HEVC_IB_PARAM_DEBLOCKING_FILTER 0x00100003 + +#define RENCODE_H264_IB_PARAM_SLICE_CONTROL 0x00200001 +#define RENCODE_H264_IB_PARAM_SPEC_MISC 0x00200002 +#define RENCODE_H264_IB_PARAM_ENCODE_PARAMS 0x00200003 +#define RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER 0x00200004 static void radeon_enc_session_info(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.session_info); - RADEON_ENC_CS(enc->enc_pic.session_info.interface_version); - RADEON_ENC_READWRITE(enc->si->res->buf, enc->si->res->domains, 0x0); - RADEON_ENC_CS(RENCODE_ENGINE_TYPE_ENCODE); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.session_info); + RADEON_ENC_CS(enc->enc_pic.session_info.interface_version); + RADEON_ENC_READWRITE(enc->si->res->buf, enc->si->res->domains, 0x0); + RADEON_ENC_CS(RENCODE_ENGINE_TYPE_ENCODE); + RADEON_ENC_END(); } static void radeon_enc_task_info(struct radeon_encoder *enc, bool need_feedback) { - enc->enc_pic.task_info.task_id++; - - if (need_feedback) - enc->enc_pic.task_info.allowed_max_num_feedbacks = 1; - else - enc->enc_pic.task_info.allowed_max_num_feedbacks = 0; - - RADEON_ENC_BEGIN(enc->cmd.task_info); - enc->p_task_size = &enc->cs->current.buf[enc->cs->current.cdw++]; - RADEON_ENC_CS(enc->enc_pic.task_info.task_id); - RADEON_ENC_CS(enc->enc_pic.task_info.allowed_max_num_feedbacks); - RADEON_ENC_END(); + enc->enc_pic.task_info.task_id++; + + if (need_feedback) + enc->enc_pic.task_info.allowed_max_num_feedbacks = 1; + else + enc->enc_pic.task_info.allowed_max_num_feedbacks = 0; + + RADEON_ENC_BEGIN(enc->cmd.task_info); + enc->p_task_size = &enc->cs->current.buf[enc->cs->current.cdw++]; + RADEON_ENC_CS(enc->enc_pic.task_info.task_id); + RADEON_ENC_CS(enc->enc_pic.task_info.allowed_max_num_feedbacks); + RADEON_ENC_END(); } static void radeon_enc_session_init(struct radeon_encoder *enc) { - enc->enc_pic.session_init.encode_standard = RENCODE_ENCODE_STANDARD_H264; - enc->enc_pic.session_init.aligned_picture_width = align(enc->base.width, 16); - enc->enc_pic.session_init.aligned_picture_height = align(enc->base.height, 16); - enc->enc_pic.session_init.padding_width = enc->enc_pic.session_init.aligned_picture_width - enc->base.width; - enc->enc_pic.session_init.padding_height = enc->enc_pic.session_init.aligned_picture_height - enc->base.height; - enc->enc_pic.session_init.pre_encode_mode = RENCODE_PREENCODE_MODE_NONE; - enc->enc_pic.session_init.pre_encode_chroma_enabled = false; - - RADEON_ENC_BEGIN(enc->cmd.session_init); - RADEON_ENC_CS(enc->enc_pic.session_init.encode_standard); - RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width); - RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height); - RADEON_ENC_CS(enc->enc_pic.session_init.padding_width); - RADEON_ENC_CS(enc->enc_pic.session_init.padding_height); - RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode); - RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled); - RADEON_ENC_END(); + enc->enc_pic.session_init.encode_standard = RENCODE_ENCODE_STANDARD_H264; + enc->enc_pic.session_init.aligned_picture_width = align(enc->base.width, 16); + enc->enc_pic.session_init.aligned_picture_height = align(enc->base.height, 16); + enc->enc_pic.session_init.padding_width = + enc->enc_pic.session_init.aligned_picture_width - enc->base.width; + enc->enc_pic.session_init.padding_height = + enc->enc_pic.session_init.aligned_picture_height - enc->base.height; + enc->enc_pic.session_init.pre_encode_mode = RENCODE_PREENCODE_MODE_NONE; + enc->enc_pic.session_init.pre_encode_chroma_enabled = false; + + RADEON_ENC_BEGIN(enc->cmd.session_init); + RADEON_ENC_CS(enc->enc_pic.session_init.encode_standard); + RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width); + RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height); + RADEON_ENC_CS(enc->enc_pic.session_init.padding_width); + RADEON_ENC_CS(enc->enc_pic.session_init.padding_height); + RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode); + RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled); + RADEON_ENC_END(); } static void radeon_enc_session_init_hevc(struct radeon_encoder *enc) { - enc->enc_pic.session_init.encode_standard = RENCODE_ENCODE_STANDARD_HEVC; - enc->enc_pic.session_init.aligned_picture_width = align(enc->base.width, 64); - enc->enc_pic.session_init.aligned_picture_height = align(enc->base.height, 16); - enc->enc_pic.session_init.padding_width = enc->enc_pic.session_init.aligned_picture_width - enc->base.width; - enc->enc_pic.session_init.padding_height = enc->enc_pic.session_init.aligned_picture_height - enc->base.height; - enc->enc_pic.session_init.pre_encode_mode = RENCODE_PREENCODE_MODE_NONE; - enc->enc_pic.session_init.pre_encode_chroma_enabled = false; - - RADEON_ENC_BEGIN(enc->cmd.session_init); - RADEON_ENC_CS(enc->enc_pic.session_init.encode_standard); - RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width); - RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height); - RADEON_ENC_CS(enc->enc_pic.session_init.padding_width); - RADEON_ENC_CS(enc->enc_pic.session_init.padding_height); - RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode); - RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled); - RADEON_ENC_END(); + enc->enc_pic.session_init.encode_standard = RENCODE_ENCODE_STANDARD_HEVC; + enc->enc_pic.session_init.aligned_picture_width = align(enc->base.width, 64); + enc->enc_pic.session_init.aligned_picture_height = align(enc->base.height, 16); + enc->enc_pic.session_init.padding_width = + enc->enc_pic.session_init.aligned_picture_width - enc->base.width; + enc->enc_pic.session_init.padding_height = + enc->enc_pic.session_init.aligned_picture_height - enc->base.height; + enc->enc_pic.session_init.pre_encode_mode = RENCODE_PREENCODE_MODE_NONE; + enc->enc_pic.session_init.pre_encode_chroma_enabled = false; + + RADEON_ENC_BEGIN(enc->cmd.session_init); + RADEON_ENC_CS(enc->enc_pic.session_init.encode_standard); + RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width); + RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height); + RADEON_ENC_CS(enc->enc_pic.session_init.padding_width); + RADEON_ENC_CS(enc->enc_pic.session_init.padding_height); + RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode); + RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled); + RADEON_ENC_END(); } static void radeon_enc_layer_control(struct radeon_encoder *enc) { - enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1; - enc->enc_pic.layer_ctrl.num_temporal_layers = 1; + enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1; + enc->enc_pic.layer_ctrl.num_temporal_layers = 1; - RADEON_ENC_BEGIN(enc->cmd.layer_control); - RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers); - RADEON_ENC_CS(enc->enc_pic.layer_ctrl.num_temporal_layers); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.layer_control); + RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers); + RADEON_ENC_CS(enc->enc_pic.layer_ctrl.num_temporal_layers); + RADEON_ENC_END(); } static void radeon_enc_layer_select(struct radeon_encoder *enc) { - enc->enc_pic.layer_sel.temporal_layer_index = 0; + enc->enc_pic.layer_sel.temporal_layer_index = 0; - RADEON_ENC_BEGIN(enc->cmd.layer_select); - RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.layer_select); + RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index); + RADEON_ENC_END(); } static void radeon_enc_slice_control(struct radeon_encoder *enc) { - enc->enc_pic.slice_ctrl.slice_control_mode = RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS; - enc->enc_pic.slice_ctrl.num_mbs_per_slice = align(enc->base.width, 16) / 16 * align(enc->base.height, 16) / 16; - - RADEON_ENC_BEGIN(enc->cmd.slice_control_h264); - RADEON_ENC_CS(enc->enc_pic.slice_ctrl.slice_control_mode); - RADEON_ENC_CS(enc->enc_pic.slice_ctrl.num_mbs_per_slice); - RADEON_ENC_END(); + enc->enc_pic.slice_ctrl.slice_control_mode = RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS; + enc->enc_pic.slice_ctrl.num_mbs_per_slice = + align(enc->base.width, 16) / 16 * align(enc->base.height, 16) / 16; + + RADEON_ENC_BEGIN(enc->cmd.slice_control_h264); + RADEON_ENC_CS(enc->enc_pic.slice_ctrl.slice_control_mode); + RADEON_ENC_CS(enc->enc_pic.slice_ctrl.num_mbs_per_slice); + RADEON_ENC_END(); } static void radeon_enc_slice_control_hevc(struct radeon_encoder *enc) { - enc->enc_pic.hevc_slice_ctrl.slice_control_mode = RENCODE_HEVC_SLICE_CONTROL_MODE_FIXED_CTBS; - enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice = align(enc->base.width, 64) / 64 * align(enc->base.height, 64) / 64; - enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice_segment = enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice; - - RADEON_ENC_BEGIN(enc->cmd.slice_control_hevc); - RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.slice_control_mode); - RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice); - RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice_segment); - RADEON_ENC_END(); + enc->enc_pic.hevc_slice_ctrl.slice_control_mode = RENCODE_HEVC_SLICE_CONTROL_MODE_FIXED_CTBS; + enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice = + align(enc->base.width, 64) / 64 * align(enc->base.height, 64) / 64; + enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice_segment = + enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice; + + RADEON_ENC_BEGIN(enc->cmd.slice_control_hevc); + RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.slice_control_mode); + RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice); + RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice_segment); + RADEON_ENC_END(); } static void radeon_enc_spec_misc(struct radeon_encoder *enc) { - enc->enc_pic.spec_misc.constrained_intra_pred_flag = 0; - enc->enc_pic.spec_misc.cabac_enable = 0; - enc->enc_pic.spec_misc.cabac_init_idc = 0; - enc->enc_pic.spec_misc.half_pel_enabled = 1; - enc->enc_pic.spec_misc.quarter_pel_enabled = 1; - enc->enc_pic.spec_misc.profile_idc = u_get_h264_profile_idc(enc->base.profile); - enc->enc_pic.spec_misc.level_idc = enc->base.level; - - RADEON_ENC_BEGIN(enc->cmd.spec_misc_h264); - RADEON_ENC_CS(enc->enc_pic.spec_misc.constrained_intra_pred_flag); - RADEON_ENC_CS(enc->enc_pic.spec_misc.cabac_enable); - RADEON_ENC_CS(enc->enc_pic.spec_misc.cabac_init_idc); - RADEON_ENC_CS(enc->enc_pic.spec_misc.half_pel_enabled); - RADEON_ENC_CS(enc->enc_pic.spec_misc.quarter_pel_enabled); - RADEON_ENC_CS(enc->enc_pic.spec_misc.profile_idc); - RADEON_ENC_CS(enc->enc_pic.spec_misc.level_idc); - RADEON_ENC_END(); + enc->enc_pic.spec_misc.constrained_intra_pred_flag = 0; + enc->enc_pic.spec_misc.cabac_enable = 0; + enc->enc_pic.spec_misc.cabac_init_idc = 0; + enc->enc_pic.spec_misc.half_pel_enabled = 1; + enc->enc_pic.spec_misc.quarter_pel_enabled = 1; + enc->enc_pic.spec_misc.profile_idc = u_get_h264_profile_idc(enc->base.profile); + enc->enc_pic.spec_misc.level_idc = enc->base.level; + + RADEON_ENC_BEGIN(enc->cmd.spec_misc_h264); + RADEON_ENC_CS(enc->enc_pic.spec_misc.constrained_intra_pred_flag); + RADEON_ENC_CS(enc->enc_pic.spec_misc.cabac_enable); + RADEON_ENC_CS(enc->enc_pic.spec_misc.cabac_init_idc); + RADEON_ENC_CS(enc->enc_pic.spec_misc.half_pel_enabled); + RADEON_ENC_CS(enc->enc_pic.spec_misc.quarter_pel_enabled); + RADEON_ENC_CS(enc->enc_pic.spec_misc.profile_idc); + RADEON_ENC_CS(enc->enc_pic.spec_misc.level_idc); + RADEON_ENC_END(); } static void radeon_enc_spec_misc_hevc(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.spec_misc_hevc); - RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3); - RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.amp_disabled); - RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled); - RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag); - RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.cabac_init_flag); - RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.half_pel_enabled); - RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.quarter_pel_enabled); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.spec_misc_hevc); + RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3); + RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.amp_disabled); + RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled); + RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag); + RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.cabac_init_flag); + RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.half_pel_enabled); + RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.quarter_pel_enabled); + RADEON_ENC_END(); } static void radeon_enc_rc_session_init(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.rc_session_init); - RADEON_ENC_CS(enc->enc_pic.rc_session_init.rate_control_method); - RADEON_ENC_CS(enc->enc_pic.rc_session_init.vbv_buffer_level); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.rc_session_init); + RADEON_ENC_CS(enc->enc_pic.rc_session_init.rate_control_method); + RADEON_ENC_CS(enc->enc_pic.rc_session_init.vbv_buffer_level); + RADEON_ENC_END(); } static void radeon_enc_rc_layer_init(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.rc_layer_init); - RADEON_ENC_CS(enc->enc_pic.rc_layer_init.target_bit_rate); - RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bit_rate); - RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_num); - RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_den); - RADEON_ENC_CS(enc->enc_pic.rc_layer_init.vbv_buffer_size); - RADEON_ENC_CS(enc->enc_pic.rc_layer_init.avg_target_bits_per_picture); - RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer); - RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.rc_layer_init); + RADEON_ENC_CS(enc->enc_pic.rc_layer_init.target_bit_rate); + RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bit_rate); + RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_num); + RADEON_ENC_CS(enc->enc_pic.rc_layer_init.frame_rate_den); + RADEON_ENC_CS(enc->enc_pic.rc_layer_init.vbv_buffer_size); + RADEON_ENC_CS(enc->enc_pic.rc_layer_init.avg_target_bits_per_picture); + RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_integer); + RADEON_ENC_CS(enc->enc_pic.rc_layer_init.peak_bits_per_picture_fractional); + RADEON_ENC_END(); } static void radeon_enc_deblocking_filter_h264(struct radeon_encoder *enc) { - enc->enc_pic.h264_deblock.disable_deblocking_filter_idc = 0; - enc->enc_pic.h264_deblock.alpha_c0_offset_div2 = 0; - enc->enc_pic.h264_deblock.beta_offset_div2 = 0; - enc->enc_pic.h264_deblock.cb_qp_offset = 0; - enc->enc_pic.h264_deblock.cr_qp_offset = 0; - - RADEON_ENC_BEGIN(enc->cmd.deblocking_filter_h264); - RADEON_ENC_CS(enc->enc_pic.h264_deblock.disable_deblocking_filter_idc); - RADEON_ENC_CS(enc->enc_pic.h264_deblock.alpha_c0_offset_div2); - RADEON_ENC_CS(enc->enc_pic.h264_deblock.beta_offset_div2); - RADEON_ENC_CS(enc->enc_pic.h264_deblock.cb_qp_offset); - RADEON_ENC_CS(enc->enc_pic.h264_deblock.cr_qp_offset); - RADEON_ENC_END(); + enc->enc_pic.h264_deblock.disable_deblocking_filter_idc = 0; + enc->enc_pic.h264_deblock.alpha_c0_offset_div2 = 0; + enc->enc_pic.h264_deblock.beta_offset_div2 = 0; + enc->enc_pic.h264_deblock.cb_qp_offset = 0; + enc->enc_pic.h264_deblock.cr_qp_offset = 0; + + RADEON_ENC_BEGIN(enc->cmd.deblocking_filter_h264); + RADEON_ENC_CS(enc->enc_pic.h264_deblock.disable_deblocking_filter_idc); + RADEON_ENC_CS(enc->enc_pic.h264_deblock.alpha_c0_offset_div2); + RADEON_ENC_CS(enc->enc_pic.h264_deblock.beta_offset_div2); + RADEON_ENC_CS(enc->enc_pic.h264_deblock.cb_qp_offset); + RADEON_ENC_CS(enc->enc_pic.h264_deblock.cr_qp_offset); + RADEON_ENC_END(); } static void radeon_enc_deblocking_filter_hevc(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.deblocking_filter_hevc); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.deblocking_filter_disabled); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.beta_offset_div2); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.tc_offset_div2); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cb_qp_offset); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cr_qp_offset); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.deblocking_filter_hevc); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.deblocking_filter_disabled); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.beta_offset_div2); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.tc_offset_div2); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cb_qp_offset); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cr_qp_offset); + RADEON_ENC_END(); } static void radeon_enc_quality_params(struct radeon_encoder *enc) { - enc->enc_pic.quality_params.vbaq_mode = 0; - enc->enc_pic.quality_params.scene_change_sensitivity = 0; - enc->enc_pic.quality_params.scene_change_min_idr_interval = 0; - - RADEON_ENC_BEGIN(enc->cmd.quality_params); - RADEON_ENC_CS(enc->enc_pic.quality_params.vbaq_mode); - RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_sensitivity); - RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_min_idr_interval); - RADEON_ENC_END(); + enc->enc_pic.quality_params.vbaq_mode = 0; + enc->enc_pic.quality_params.scene_change_sensitivity = 0; + enc->enc_pic.quality_params.scene_change_min_idr_interval = 0; + + RADEON_ENC_BEGIN(enc->cmd.quality_params); + RADEON_ENC_CS(enc->enc_pic.quality_params.vbaq_mode); + RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_sensitivity); + RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_min_idr_interval); + RADEON_ENC_END(); } static void radeon_enc_nalu_sps(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.nalu); - RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; - radeon_enc_reset(enc); - radeon_enc_set_emulation_prevention(enc, false); - radeon_enc_code_fixed_bits(enc, 0x00000001, 32); - radeon_enc_code_fixed_bits(enc, 0x67, 8); - radeon_enc_byte_align(enc); - radeon_enc_set_emulation_prevention(enc, true); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8); - radeon_enc_code_fixed_bits(enc, 0x44, 8); //hardcode to constrained baseline - radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.level_idc, 8); - radeon_enc_code_ue(enc, 0x0); - - if(enc->enc_pic.spec_misc.profile_idc == 100 || enc->enc_pic.spec_misc.profile_idc == 110 || enc->enc_pic.spec_misc.profile_idc == 122 || - enc->enc_pic.spec_misc.profile_idc == 244 || enc->enc_pic.spec_misc.profile_idc == 44 || enc->enc_pic.spec_misc.profile_idc == 83 || - enc->enc_pic.spec_misc.profile_idc == 86 || enc->enc_pic.spec_misc.profile_idc == 118 || enc->enc_pic.spec_misc.profile_idc == 128 || - enc->enc_pic.spec_misc.profile_idc == 138) { - radeon_enc_code_ue(enc, 0x1); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_fixed_bits(enc, 0x0, 2); - } - - radeon_enc_code_ue(enc, 1); - radeon_enc_code_ue(enc, enc->enc_pic.pic_order_cnt_type); - - if (enc->enc_pic.pic_order_cnt_type == 0) - radeon_enc_code_ue(enc, 1); - - radeon_enc_code_ue(enc, (enc->base.max_references + 1)); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers > 1 ? 0x1 : 0x0, 1); - radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_width / 16 - 1)); - radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_height / 16 - 1)); - bool progressive_only = true; - radeon_enc_code_fixed_bits(enc, progressive_only ? 0x1 : 0x0, 1); - - if (!progressive_only) - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - radeon_enc_code_fixed_bits(enc, 0x1, 1); - - if ((enc->enc_pic.crop_left != 0) || (enc->enc_pic.crop_right != 0) || - (enc->enc_pic.crop_top != 0) || (enc->enc_pic.crop_bottom != 0)) { - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_ue(enc, enc->enc_pic.crop_left); - radeon_enc_code_ue(enc, enc->enc_pic.crop_right); - radeon_enc_code_ue(enc, enc->enc_pic.crop_top); - radeon_enc_code_ue(enc, enc->enc_pic.crop_bottom); - } else - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 16); - radeon_enc_code_ue(enc, 16); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, (enc->base.max_references + 1)); - - radeon_enc_code_fixed_bits(enc, 0x1, 1); - - radeon_enc_byte_align(enc); - radeon_enc_flush_headers(enc); - *size_in_bytes = (enc->bits_output + 7) / 8; - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.nalu); + RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS); + uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + radeon_enc_code_fixed_bits(enc, 0x00000001, 32); + radeon_enc_code_fixed_bits(enc, 0x67, 8); + radeon_enc_byte_align(enc); + radeon_enc_set_emulation_prevention(enc, true); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8); + radeon_enc_code_fixed_bits(enc, 0x44, 8); // hardcode to constrained baseline + radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.level_idc, 8); + radeon_enc_code_ue(enc, 0x0); + + if (enc->enc_pic.spec_misc.profile_idc == 100 || enc->enc_pic.spec_misc.profile_idc == 110 || + enc->enc_pic.spec_misc.profile_idc == 122 || enc->enc_pic.spec_misc.profile_idc == 244 || + enc->enc_pic.spec_misc.profile_idc == 44 || enc->enc_pic.spec_misc.profile_idc == 83 || + enc->enc_pic.spec_misc.profile_idc == 86 || enc->enc_pic.spec_misc.profile_idc == 118 || + enc->enc_pic.spec_misc.profile_idc == 128 || enc->enc_pic.spec_misc.profile_idc == 138) { + radeon_enc_code_ue(enc, 0x1); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_fixed_bits(enc, 0x0, 2); + } + + radeon_enc_code_ue(enc, 1); + radeon_enc_code_ue(enc, enc->enc_pic.pic_order_cnt_type); + + if (enc->enc_pic.pic_order_cnt_type == 0) + radeon_enc_code_ue(enc, 1); + + radeon_enc_code_ue(enc, (enc->base.max_references + 1)); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers > 1 ? 0x1 : 0x0, + 1); + radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_width / 16 - 1)); + radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_height / 16 - 1)); + bool progressive_only = true; + radeon_enc_code_fixed_bits(enc, progressive_only ? 0x1 : 0x0, 1); + + if (!progressive_only) + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_fixed_bits(enc, 0x1, 1); + + if ((enc->enc_pic.crop_left != 0) || (enc->enc_pic.crop_right != 0) || + (enc->enc_pic.crop_top != 0) || (enc->enc_pic.crop_bottom != 0)) { + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_ue(enc, enc->enc_pic.crop_left); + radeon_enc_code_ue(enc, enc->enc_pic.crop_right); + radeon_enc_code_ue(enc, enc->enc_pic.crop_top); + radeon_enc_code_ue(enc, enc->enc_pic.crop_bottom); + } else + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 16); + radeon_enc_code_ue(enc, 16); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, (enc->base.max_references + 1)); + + radeon_enc_code_fixed_bits(enc, 0x1, 1); + + radeon_enc_byte_align(enc); + radeon_enc_flush_headers(enc); + *size_in_bytes = (enc->bits_output + 7) / 8; + RADEON_ENC_END(); } static void radeon_enc_nalu_sps_hevc(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.nalu); - RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; - int i; - - radeon_enc_reset(enc); - radeon_enc_set_emulation_prevention(enc, false); - radeon_enc_code_fixed_bits(enc, 0x00000001, 32); - radeon_enc_code_fixed_bits(enc, 0x4201, 16); - radeon_enc_byte_align(enc); - radeon_enc_set_emulation_prevention(enc, true); - radeon_enc_code_fixed_bits(enc, 0x0, 4); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1, 3); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 2); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5); - radeon_enc_code_fixed_bits(enc, 0x60000000, 32); - radeon_enc_code_fixed_bits(enc, 0xb0000000, 32); - radeon_enc_code_fixed_bits(enc, 0x0, 16); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8); - - for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) ; i++) - radeon_enc_code_fixed_bits(enc, 0x0, 2); - - if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) { - for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++) - radeon_enc_code_fixed_bits(enc, 0x0, 2); - } - - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, enc->enc_pic.chroma_format_idc); - radeon_enc_code_ue(enc, enc->enc_pic.session_init.aligned_picture_width); - radeon_enc_code_ue(enc, enc->enc_pic.session_init.aligned_picture_height); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8); - radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8); - radeon_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_ue(enc, 1); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3); - //Only support CTBSize 64 - radeon_enc_code_ue(enc, 6 - (enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 + 3)); - radeon_enc_code_ue(enc, enc->enc_pic.log2_min_transform_block_size_minus2); - radeon_enc_code_ue(enc, enc->enc_pic.log2_diff_max_min_transform_block_size); - radeon_enc_code_ue(enc, enc->enc_pic.max_transform_hierarchy_depth_inter); - radeon_enc_code_ue(enc, enc->enc_pic.max_transform_hierarchy_depth_intra); - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, !enc->enc_pic.hevc_spec_misc.amp_disabled, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.sample_adaptive_offset_enabled_flag, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1); - - radeon_enc_code_ue(enc, 1); - radeon_enc_code_ue(enc, 1); - radeon_enc_code_ue(enc, 0); - radeon_enc_code_ue(enc, 0); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - radeon_enc_code_fixed_bits(enc, 0, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled, 1); - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - radeon_enc_code_fixed_bits(enc, 0x1, 1); - - radeon_enc_byte_align(enc); - radeon_enc_flush_headers(enc); - *size_in_bytes = (enc->bits_output + 7) / 8; - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.nalu); + RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS); + uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + int i; + + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + radeon_enc_code_fixed_bits(enc, 0x00000001, 32); + radeon_enc_code_fixed_bits(enc, 0x4201, 16); + radeon_enc_byte_align(enc); + radeon_enc_set_emulation_prevention(enc, true); + radeon_enc_code_fixed_bits(enc, 0x0, 4); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1, 3); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 2); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5); + radeon_enc_code_fixed_bits(enc, 0x60000000, 32); + radeon_enc_code_fixed_bits(enc, 0xb0000000, 32); + radeon_enc_code_fixed_bits(enc, 0x0, 16); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8); + + for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++) + radeon_enc_code_fixed_bits(enc, 0x0, 2); + + if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) { + for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++) + radeon_enc_code_fixed_bits(enc, 0x0, 2); + } + + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, enc->enc_pic.chroma_format_idc); + radeon_enc_code_ue(enc, enc->enc_pic.session_init.aligned_picture_width); + radeon_enc_code_ue(enc, enc->enc_pic.session_init.aligned_picture_height); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8); + radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8); + radeon_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, 1); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3); + // Only support CTBSize 64 + radeon_enc_code_ue(enc, + 6 - (enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 + 3)); + radeon_enc_code_ue(enc, enc->enc_pic.log2_min_transform_block_size_minus2); + radeon_enc_code_ue(enc, enc->enc_pic.log2_diff_max_min_transform_block_size); + radeon_enc_code_ue(enc, enc->enc_pic.max_transform_hierarchy_depth_inter); + radeon_enc_code_ue(enc, enc->enc_pic.max_transform_hierarchy_depth_intra); + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, !enc->enc_pic.hevc_spec_misc.amp_disabled, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.sample_adaptive_offset_enabled_flag, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1); + + radeon_enc_code_ue(enc, 1); + radeon_enc_code_ue(enc, 1); + radeon_enc_code_ue(enc, 0); + radeon_enc_code_ue(enc, 0); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_fixed_bits(enc, 0, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled, 1); + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_fixed_bits(enc, 0x1, 1); + + radeon_enc_byte_align(enc); + radeon_enc_flush_headers(enc); + *size_in_bytes = (enc->bits_output + 7) / 8; + RADEON_ENC_END(); } static void radeon_enc_nalu_pps(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.nalu); - RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; - radeon_enc_reset(enc); - radeon_enc_set_emulation_prevention(enc, false); - radeon_enc_code_fixed_bits(enc, 0x00000001, 32); - radeon_enc_code_fixed_bits(enc, 0x68, 8); - radeon_enc_byte_align(enc); - radeon_enc_set_emulation_prevention(enc, true); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_fixed_bits(enc, (enc->enc_pic.spec_misc.cabac_enable ? 0x1 : 0x0), 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 2); - radeon_enc_code_se(enc, 0x0); - radeon_enc_code_se(enc, 0x0); - radeon_enc_code_se(enc, 0x0); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - radeon_enc_code_fixed_bits(enc, 0x1, 1); - - radeon_enc_byte_align(enc); - radeon_enc_flush_headers(enc); - *size_in_bytes = (enc->bits_output + 7) / 8; - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.nalu); + RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS); + uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + radeon_enc_code_fixed_bits(enc, 0x00000001, 32); + radeon_enc_code_fixed_bits(enc, 0x68, 8); + radeon_enc_byte_align(enc); + radeon_enc_set_emulation_prevention(enc, true); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_fixed_bits(enc, (enc->enc_pic.spec_misc.cabac_enable ? 0x1 : 0x0), 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 2); + radeon_enc_code_se(enc, 0x0); + radeon_enc_code_se(enc, 0x0); + radeon_enc_code_se(enc, 0x0); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_fixed_bits(enc, 0x1, 1); + + radeon_enc_byte_align(enc); + radeon_enc_flush_headers(enc); + *size_in_bytes = (enc->bits_output + 7) / 8; + RADEON_ENC_END(); } static void radeon_enc_nalu_pps_hevc(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.nalu); - RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; - radeon_enc_reset(enc); - radeon_enc_set_emulation_prevention(enc, false); - radeon_enc_code_fixed_bits(enc, 0x00000001, 32); - radeon_enc_code_fixed_bits(enc, 0x4401, 16); - radeon_enc_byte_align(enc); - radeon_enc_set_emulation_prevention(enc, true); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 4); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_se(enc, 0x0); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - if (enc->enc_pic.rc_session_init.rate_control_method == - RENCODE_RATE_CONTROL_METHOD_NONE) - radeon_enc_code_fixed_bits(enc, 0x0, 1); - else { - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_ue(enc, 0x0); - } - radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset); - radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 2); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled, 1); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.deblocking_filter_disabled, 1); - - if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) { - radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2); - radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2); - } - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2); - radeon_enc_code_fixed_bits(enc, 0x0, 2); - - radeon_enc_code_fixed_bits(enc, 0x1, 1); - - radeon_enc_byte_align(enc); - radeon_enc_flush_headers(enc); - *size_in_bytes = (enc->bits_output + 7) / 8; - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.nalu); + RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS); + uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + radeon_enc_code_fixed_bits(enc, 0x00000001, 32); + radeon_enc_code_fixed_bits(enc, 0x4401, 16); + radeon_enc_byte_align(enc); + radeon_enc_set_emulation_prevention(enc, true); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 4); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_se(enc, 0x0); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + if (enc->enc_pic.rc_session_init.rate_control_method == RENCODE_RATE_CONTROL_METHOD_NONE) + radeon_enc_code_fixed_bits(enc, 0x0, 1); + else { + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_ue(enc, 0x0); + } + radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset); + radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 2); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled, 1); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.deblocking_filter_disabled, 1); + + if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) { + radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2); + radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2); + } + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2); + radeon_enc_code_fixed_bits(enc, 0x0, 2); + + radeon_enc_code_fixed_bits(enc, 0x1, 1); + + radeon_enc_byte_align(enc); + radeon_enc_flush_headers(enc); + *size_in_bytes = (enc->bits_output + 7) / 8; + RADEON_ENC_END(); } static void radeon_enc_nalu_vps(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.nalu); - RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_VPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; - int i; - - radeon_enc_reset(enc); - radeon_enc_set_emulation_prevention(enc, false); - radeon_enc_code_fixed_bits(enc, 0x00000001, 32); - radeon_enc_code_fixed_bits(enc, 0x4001, 16); - radeon_enc_byte_align(enc); - radeon_enc_set_emulation_prevention(enc, true); - - radeon_enc_code_fixed_bits(enc, 0x0, 4); - radeon_enc_code_fixed_bits(enc, 0x3, 2); - radeon_enc_code_fixed_bits(enc, 0x0, 6); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1, 3); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_fixed_bits(enc, 0xffff, 16); - radeon_enc_code_fixed_bits(enc, 0x0, 2); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5); - radeon_enc_code_fixed_bits(enc, 0x60000000, 32); - radeon_enc_code_fixed_bits(enc, 0xb0000000, 32); - radeon_enc_code_fixed_bits(enc, 0x0, 16); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8); - - for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) ; i++) - radeon_enc_code_fixed_bits(enc, 0x0, 2); - - if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) { - for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++) - radeon_enc_code_fixed_bits(enc, 0x0, 2); - } - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_ue(enc, 0x1); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - - radeon_enc_code_fixed_bits(enc, 0x0, 6); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - radeon_enc_code_fixed_bits(enc, 0x1, 1); - - radeon_enc_byte_align(enc); - radeon_enc_flush_headers(enc); - *size_in_bytes = (enc->bits_output + 7) / 8; - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.nalu); + RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_VPS); + uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + int i; + + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + radeon_enc_code_fixed_bits(enc, 0x00000001, 32); + radeon_enc_code_fixed_bits(enc, 0x4001, 16); + radeon_enc_byte_align(enc); + radeon_enc_set_emulation_prevention(enc, true); + + radeon_enc_code_fixed_bits(enc, 0x0, 4); + radeon_enc_code_fixed_bits(enc, 0x3, 2); + radeon_enc_code_fixed_bits(enc, 0x0, 6); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1, 3); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, 0xffff, 16); + radeon_enc_code_fixed_bits(enc, 0x0, 2); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5); + radeon_enc_code_fixed_bits(enc, 0x60000000, 32); + radeon_enc_code_fixed_bits(enc, 0xb0000000, 32); + radeon_enc_code_fixed_bits(enc, 0x0, 16); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8); + + for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++) + radeon_enc_code_fixed_bits(enc, 0x0, 2); + + if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) { + for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++) + radeon_enc_code_fixed_bits(enc, 0x0, 2); + } + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, 0x1); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + + radeon_enc_code_fixed_bits(enc, 0x0, 6); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_fixed_bits(enc, 0x1, 1); + + radeon_enc_byte_align(enc); + radeon_enc_flush_headers(enc); + *size_in_bytes = (enc->bits_output + 7) / 8; + RADEON_ENC_END(); } static void radeon_enc_nalu_aud_hevc(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.nalu); - RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_AUD); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; - radeon_enc_reset(enc); - radeon_enc_set_emulation_prevention(enc, false); - radeon_enc_code_fixed_bits(enc, 0x00000001, 32); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 35, 6); - radeon_enc_code_fixed_bits(enc, 0x0, 6); - radeon_enc_code_fixed_bits(enc, 0x1, 3); - radeon_enc_byte_align(enc); - radeon_enc_set_emulation_prevention(enc, true); - switch(enc->enc_pic.picture_type) { - case PIPE_H265_ENC_PICTURE_TYPE_I: - case PIPE_H265_ENC_PICTURE_TYPE_IDR: - radeon_enc_code_fixed_bits(enc, 0x00, 3); - break; - case PIPE_H265_ENC_PICTURE_TYPE_P: - radeon_enc_code_fixed_bits(enc, 0x01, 3); - break; - case PIPE_H265_ENC_PICTURE_TYPE_B: - radeon_enc_code_fixed_bits(enc, 0x02, 3); - break; - default: - radeon_enc_code_fixed_bits(enc, 0x02, 3); - } - - radeon_enc_code_fixed_bits(enc, 0x1, 1); - - radeon_enc_byte_align(enc); - radeon_enc_flush_headers(enc); - *size_in_bytes = (enc->bits_output + 7) / 8; - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.nalu); + RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_AUD); + uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + radeon_enc_code_fixed_bits(enc, 0x00000001, 32); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 35, 6); + radeon_enc_code_fixed_bits(enc, 0x0, 6); + radeon_enc_code_fixed_bits(enc, 0x1, 3); + radeon_enc_byte_align(enc); + radeon_enc_set_emulation_prevention(enc, true); + switch (enc->enc_pic.picture_type) { + case PIPE_H265_ENC_PICTURE_TYPE_I: + case PIPE_H265_ENC_PICTURE_TYPE_IDR: + radeon_enc_code_fixed_bits(enc, 0x00, 3); + break; + case PIPE_H265_ENC_PICTURE_TYPE_P: + radeon_enc_code_fixed_bits(enc, 0x01, 3); + break; + case PIPE_H265_ENC_PICTURE_TYPE_B: + radeon_enc_code_fixed_bits(enc, 0x02, 3); + break; + default: + radeon_enc_code_fixed_bits(enc, 0x02, 3); + } + + radeon_enc_code_fixed_bits(enc, 0x1, 1); + + radeon_enc_byte_align(enc); + radeon_enc_flush_headers(enc); + *size_in_bytes = (enc->bits_output + 7) / 8; + RADEON_ENC_END(); } static void radeon_enc_slice_header(struct radeon_encoder *enc) { - uint32_t instruction[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; - uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; - unsigned int inst_index = 0; - unsigned int bit_index = 0; - unsigned int bits_copied = 0; - RADEON_ENC_BEGIN(enc->cmd.slice_header); - radeon_enc_reset(enc); - radeon_enc_set_emulation_prevention(enc, false); - - if (enc->enc_pic.is_idr) - radeon_enc_code_fixed_bits(enc, 0x65, 8); - else if (enc->enc_pic.not_referenced) - radeon_enc_code_fixed_bits(enc, 0x01, 8); - else - radeon_enc_code_fixed_bits(enc, 0x41, 8); - - radeon_enc_flush_headers(enc); - bit_index ++; - instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; - num_bits[inst_index] = enc->bits_output - bits_copied; - bits_copied = enc->bits_output; - inst_index++; - - instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_FIRST_MB; - inst_index++; - - switch(enc->enc_pic.picture_type) { - case PIPE_H264_ENC_PICTURE_TYPE_I: - case PIPE_H264_ENC_PICTURE_TYPE_IDR: - radeon_enc_code_fixed_bits(enc, 0x08, 7); - break; - case PIPE_H264_ENC_PICTURE_TYPE_P: - case PIPE_H264_ENC_PICTURE_TYPE_SKIP: - radeon_enc_code_fixed_bits(enc, 0x06, 5); - break; - case PIPE_H264_ENC_PICTURE_TYPE_B: - radeon_enc_code_fixed_bits(enc, 0x07, 5); - break; - default: - radeon_enc_code_fixed_bits(enc, 0x08, 7); - } - - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.frame_num % 32, 5); - - if (enc->enc_pic.h264_enc_params.input_picture_structure != RENCODE_H264_PICTURE_STRUCTURE_FRAME) { - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.h264_enc_params.input_picture_structure == RENCODE_H264_PICTURE_STRUCTURE_BOTTOM_FIELD ? 1 : 0, 1); - } - - if (enc->enc_pic.is_idr) - radeon_enc_code_ue(enc, enc->enc_pic.is_even_frame); - - enc->enc_pic.is_even_frame = !enc->enc_pic.is_even_frame; - - if (enc->enc_pic.pic_order_cnt_type == 0) - radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt % 32, 5); - - if (enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) { - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - if (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 > 1) { - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 - 1)); - radeon_enc_code_ue(enc, 0x3); - } else - radeon_enc_code_fixed_bits(enc, 0x0, 1); - } - - if (enc->enc_pic.is_idr) { - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - } else - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - if ((enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) && (enc->enc_pic.spec_misc.cabac_enable)) - radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc); - - radeon_enc_flush_headers(enc); - bit_index ++; - instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; - num_bits[inst_index] = enc->bits_output - bits_copied; - bits_copied = enc->bits_output; - inst_index++; - - instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_SLICE_QP_DELTA; - inst_index++; - - radeon_enc_code_ue(enc, enc->enc_pic.h264_deblock.disable_deblocking_filter_idc ? 1: 0); - - if (!enc->enc_pic.h264_deblock.disable_deblocking_filter_idc) { - radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.alpha_c0_offset_div2); - radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.beta_offset_div2); - } - - radeon_enc_flush_headers(enc); - bit_index ++; - instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; - num_bits[inst_index] = enc->bits_output - bits_copied; - bits_copied = enc->bits_output; - inst_index++; - - instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END; - - for (int i = bit_index; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++) - RADEON_ENC_CS(0x00000000); - - for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) { - RADEON_ENC_CS(instruction[j]); - RADEON_ENC_CS(num_bits[j]); - } - - RADEON_ENC_END(); + uint32_t instruction[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; + uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; + unsigned int inst_index = 0; + unsigned int bit_index = 0; + unsigned int bits_copied = 0; + RADEON_ENC_BEGIN(enc->cmd.slice_header); + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + + if (enc->enc_pic.is_idr) + radeon_enc_code_fixed_bits(enc, 0x65, 8); + else if (enc->enc_pic.not_referenced) + radeon_enc_code_fixed_bits(enc, 0x01, 8); + else + radeon_enc_code_fixed_bits(enc, 0x41, 8); + + radeon_enc_flush_headers(enc); + bit_index++; + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + + instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_FIRST_MB; + inst_index++; + + switch (enc->enc_pic.picture_type) { + case PIPE_H264_ENC_PICTURE_TYPE_I: + case PIPE_H264_ENC_PICTURE_TYPE_IDR: + radeon_enc_code_fixed_bits(enc, 0x08, 7); + break; + case PIPE_H264_ENC_PICTURE_TYPE_P: + case PIPE_H264_ENC_PICTURE_TYPE_SKIP: + radeon_enc_code_fixed_bits(enc, 0x06, 5); + break; + case PIPE_H264_ENC_PICTURE_TYPE_B: + radeon_enc_code_fixed_bits(enc, 0x07, 5); + break; + default: + radeon_enc_code_fixed_bits(enc, 0x08, 7); + } + + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.frame_num % 32, 5); + + if (enc->enc_pic.h264_enc_params.input_picture_structure != + RENCODE_H264_PICTURE_STRUCTURE_FRAME) { + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, + enc->enc_pic.h264_enc_params.input_picture_structure == + RENCODE_H264_PICTURE_STRUCTURE_BOTTOM_FIELD + ? 1 + : 0, + 1); + } + + if (enc->enc_pic.is_idr) + radeon_enc_code_ue(enc, enc->enc_pic.is_even_frame); + + enc->enc_pic.is_even_frame = !enc->enc_pic.is_even_frame; + + if (enc->enc_pic.pic_order_cnt_type == 0) + radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt % 32, 5); + + if (enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) { + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + if (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 > 1) { + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 - 1)); + radeon_enc_code_ue(enc, 0x3); + } else + radeon_enc_code_fixed_bits(enc, 0x0, 1); + } + + if (enc->enc_pic.is_idr) { + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + } else + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + if ((enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) && + (enc->enc_pic.spec_misc.cabac_enable)) + radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc); + + radeon_enc_flush_headers(enc); + bit_index++; + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + + instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_SLICE_QP_DELTA; + inst_index++; + + radeon_enc_code_ue(enc, enc->enc_pic.h264_deblock.disable_deblocking_filter_idc ? 1 : 0); + + if (!enc->enc_pic.h264_deblock.disable_deblocking_filter_idc) { + radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.alpha_c0_offset_div2); + radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.beta_offset_div2); + } + + radeon_enc_flush_headers(enc); + bit_index++; + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END; + + for (int i = bit_index; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++) + RADEON_ENC_CS(0x00000000); + + for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) { + RADEON_ENC_CS(instruction[j]); + RADEON_ENC_CS(num_bits[j]); + } + + RADEON_ENC_END(); } static void radeon_enc_slice_header_hevc(struct radeon_encoder *enc) { - uint32_t instruction[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; - uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; - unsigned int inst_index = 0; - unsigned int bit_index = 0; - unsigned int bits_copied = 0; - RADEON_ENC_BEGIN(enc->cmd.slice_header); - radeon_enc_reset(enc); - radeon_enc_set_emulation_prevention(enc, false); - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6); - radeon_enc_code_fixed_bits(enc, 0x0, 6); - radeon_enc_code_fixed_bits(enc, 0x1, 3); - - radeon_enc_flush_headers(enc); - bit_index ++; - instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; - num_bits[inst_index] = enc->bits_output - bits_copied; - bits_copied = enc->bits_output; - inst_index++; - - instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_FIRST_SLICE; - inst_index++; - - if ((enc->enc_pic.nal_unit_type >= 16) && (enc->enc_pic.nal_unit_type <= 23)) - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - radeon_enc_code_ue(enc, 0x0); - - radeon_enc_flush_headers(enc); - bit_index ++; - instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; - num_bits[inst_index] = enc->bits_output - bits_copied; - bits_copied = enc->bits_output; - inst_index++; - - instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_SEGMENT; - inst_index++; - - instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_DEPENDENT_SLICE_END; - inst_index++; - - switch(enc->enc_pic.picture_type) { - case PIPE_H265_ENC_PICTURE_TYPE_I: - case PIPE_H265_ENC_PICTURE_TYPE_IDR: - radeon_enc_code_ue(enc, 0x2); - break; - case PIPE_H265_ENC_PICTURE_TYPE_P: - case PIPE_H265_ENC_PICTURE_TYPE_SKIP: - radeon_enc_code_ue(enc, 0x1); - break; - case PIPE_H265_ENC_PICTURE_TYPE_B: - radeon_enc_code_ue(enc, 0x0); - break; - default: - radeon_enc_code_ue(enc, 0x1); - } - - if ((enc->enc_pic.nal_unit_type != 19) && (enc->enc_pic.nal_unit_type != 20)) { - radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt, enc->enc_pic.log2_max_poc); - if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) - radeon_enc_code_fixed_bits(enc, 0x1, 1); - else { - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - } - } - - if ((enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) || - (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B)) { - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.cabac_init_flag, 1); - radeon_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand); - } - - radeon_enc_flush_headers(enc); - bit_index ++; - instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; - num_bits[inst_index] = enc->bits_output - bits_copied; - bits_copied = enc->bits_output; - inst_index++; - - instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_QP_DELTA; - inst_index++; - - if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) && - (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)){ - radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled, 1); - - radeon_enc_flush_headers(enc); - bit_index ++; - instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; - num_bits[inst_index] = enc->bits_output - bits_copied; - bits_copied = enc->bits_output; - inst_index++; - } - - instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END; - - for (int i = bit_index; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++) - RADEON_ENC_CS(0x00000000); - - for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) { - RADEON_ENC_CS(instruction[j]); - RADEON_ENC_CS(num_bits[j]); - } - - RADEON_ENC_END(); + uint32_t instruction[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; + uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; + unsigned int inst_index = 0; + unsigned int bit_index = 0; + unsigned int bits_copied = 0; + RADEON_ENC_BEGIN(enc->cmd.slice_header); + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6); + radeon_enc_code_fixed_bits(enc, 0x0, 6); + radeon_enc_code_fixed_bits(enc, 0x1, 3); + + radeon_enc_flush_headers(enc); + bit_index++; + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + + instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_FIRST_SLICE; + inst_index++; + + if ((enc->enc_pic.nal_unit_type >= 16) && (enc->enc_pic.nal_unit_type <= 23)) + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_ue(enc, 0x0); + + radeon_enc_flush_headers(enc); + bit_index++; + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + + instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_SEGMENT; + inst_index++; + + instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_DEPENDENT_SLICE_END; + inst_index++; + + switch (enc->enc_pic.picture_type) { + case PIPE_H265_ENC_PICTURE_TYPE_I: + case PIPE_H265_ENC_PICTURE_TYPE_IDR: + radeon_enc_code_ue(enc, 0x2); + break; + case PIPE_H265_ENC_PICTURE_TYPE_P: + case PIPE_H265_ENC_PICTURE_TYPE_SKIP: + radeon_enc_code_ue(enc, 0x1); + break; + case PIPE_H265_ENC_PICTURE_TYPE_B: + radeon_enc_code_ue(enc, 0x0); + break; + default: + radeon_enc_code_ue(enc, 0x1); + } + + if ((enc->enc_pic.nal_unit_type != 19) && (enc->enc_pic.nal_unit_type != 20)) { + radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt, enc->enc_pic.log2_max_poc); + if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) + radeon_enc_code_fixed_bits(enc, 0x1, 1); + else { + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + } + } + + if ((enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) || + (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B)) { + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.cabac_init_flag, 1); + radeon_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand); + } + + radeon_enc_flush_headers(enc); + bit_index++; + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + + instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_QP_DELTA; + inst_index++; + + if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) && + (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)) { + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled, + 1); + + radeon_enc_flush_headers(enc); + bit_index++; + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + } + + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END; + + for (int i = bit_index; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++) + RADEON_ENC_CS(0x00000000); + + for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) { + RADEON_ENC_CS(instruction[j]); + RADEON_ENC_CS(num_bits[j]); + } + + RADEON_ENC_END(); } static void radeon_enc_ctx(struct radeon_encoder *enc) { - enc->enc_pic.ctx_buf.swizzle_mode = 0; - enc->enc_pic.ctx_buf.rec_luma_pitch = align(enc->base.width, enc->alignment); - enc->enc_pic.ctx_buf.rec_chroma_pitch = align(enc->base.width, enc->alignment); - enc->enc_pic.ctx_buf.num_reconstructed_pictures = 2; - - RADEON_ENC_BEGIN(enc->cmd.ctx); - RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); - RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode); - RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch); - RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch); - RADEON_ENC_CS(enc->enc_pic.ctx_buf.num_reconstructed_pictures); - /* reconstructed_picture_1_luma_offset */ - RADEON_ENC_CS(0x00000000); - /* reconstructed_picture_1_chroma_offset */ - RADEON_ENC_CS(align(enc->base.width, enc->alignment) * align(enc->base.height, 16)); - /* reconstructed_picture_2_luma_offset */ - RADEON_ENC_CS(align(enc->base.width, enc->alignment) * align(enc->base.height, 16) * 3 / 2); - /* reconstructed_picture_2_chroma_offset */ - RADEON_ENC_CS(align(enc->base.width, enc->alignment) * align(enc->base.height, 16) * 5 / 2); - - for (int i = 0; i < 136 ; i++) - RADEON_ENC_CS(0x00000000); - - RADEON_ENC_END(); + enc->enc_pic.ctx_buf.swizzle_mode = 0; + enc->enc_pic.ctx_buf.rec_luma_pitch = align(enc->base.width, enc->alignment); + enc->enc_pic.ctx_buf.rec_chroma_pitch = align(enc->base.width, enc->alignment); + enc->enc_pic.ctx_buf.num_reconstructed_pictures = 2; + + RADEON_ENC_BEGIN(enc->cmd.ctx); + RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.num_reconstructed_pictures); + /* reconstructed_picture_1_luma_offset */ + RADEON_ENC_CS(0x00000000); + /* reconstructed_picture_1_chroma_offset */ + RADEON_ENC_CS(align(enc->base.width, enc->alignment) * align(enc->base.height, 16)); + /* reconstructed_picture_2_luma_offset */ + RADEON_ENC_CS(align(enc->base.width, enc->alignment) * align(enc->base.height, 16) * 3 / 2); + /* reconstructed_picture_2_chroma_offset */ + RADEON_ENC_CS(align(enc->base.width, enc->alignment) * align(enc->base.height, 16) * 5 / 2); + + for (int i = 0; i < 136; i++) + RADEON_ENC_CS(0x00000000); + + RADEON_ENC_END(); } static void radeon_enc_bitstream(struct radeon_encoder *enc) { - enc->enc_pic.bit_buf.mode = RENCODE_REC_SWIZZLE_MODE_LINEAR; - enc->enc_pic.bit_buf.video_bitstream_buffer_size = enc->bs_size; - enc->enc_pic.bit_buf.video_bitstream_data_offset = 0; - - RADEON_ENC_BEGIN(enc->cmd.bitstream); - RADEON_ENC_CS(enc->enc_pic.bit_buf.mode); - RADEON_ENC_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0); - RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_buffer_size); - RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_data_offset); - RADEON_ENC_END(); + enc->enc_pic.bit_buf.mode = RENCODE_REC_SWIZZLE_MODE_LINEAR; + enc->enc_pic.bit_buf.video_bitstream_buffer_size = enc->bs_size; + enc->enc_pic.bit_buf.video_bitstream_data_offset = 0; + + RADEON_ENC_BEGIN(enc->cmd.bitstream); + RADEON_ENC_CS(enc->enc_pic.bit_buf.mode); + RADEON_ENC_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0); + RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_buffer_size); + RADEON_ENC_CS(enc->enc_pic.bit_buf.video_bitstream_data_offset); + RADEON_ENC_END(); } static void radeon_enc_feedback(struct radeon_encoder *enc) { - enc->enc_pic.fb_buf.mode = RENCODE_FEEDBACK_BUFFER_MODE_LINEAR; - enc->enc_pic.fb_buf.feedback_buffer_size = 16; - enc->enc_pic.fb_buf.feedback_data_size = 40; - - RADEON_ENC_BEGIN(enc->cmd.feedback); - RADEON_ENC_CS(enc->enc_pic.fb_buf.mode); - RADEON_ENC_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0); - RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_buffer_size); - RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_data_size); - RADEON_ENC_END(); + enc->enc_pic.fb_buf.mode = RENCODE_FEEDBACK_BUFFER_MODE_LINEAR; + enc->enc_pic.fb_buf.feedback_buffer_size = 16; + enc->enc_pic.fb_buf.feedback_data_size = 40; + + RADEON_ENC_BEGIN(enc->cmd.feedback); + RADEON_ENC_CS(enc->enc_pic.fb_buf.mode); + RADEON_ENC_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0); + RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_buffer_size); + RADEON_ENC_CS(enc->enc_pic.fb_buf.feedback_data_size); + RADEON_ENC_END(); } static void radeon_enc_intra_refresh(struct radeon_encoder *enc) { - enc->enc_pic.intra_ref.intra_refresh_mode = RENCODE_INTRA_REFRESH_MODE_NONE; - enc->enc_pic.intra_ref.offset = 0; - enc->enc_pic.intra_ref.region_size = 0; - - RADEON_ENC_BEGIN(enc->cmd.intra_refresh); - RADEON_ENC_CS(enc->enc_pic.intra_ref.intra_refresh_mode); - RADEON_ENC_CS(enc->enc_pic.intra_ref.offset); - RADEON_ENC_CS(enc->enc_pic.intra_ref.region_size); - RADEON_ENC_END(); + enc->enc_pic.intra_ref.intra_refresh_mode = RENCODE_INTRA_REFRESH_MODE_NONE; + enc->enc_pic.intra_ref.offset = 0; + enc->enc_pic.intra_ref.region_size = 0; + + RADEON_ENC_BEGIN(enc->cmd.intra_refresh); + RADEON_ENC_CS(enc->enc_pic.intra_ref.intra_refresh_mode); + RADEON_ENC_CS(enc->enc_pic.intra_ref.offset); + RADEON_ENC_CS(enc->enc_pic.intra_ref.region_size); + RADEON_ENC_END(); } static void radeon_enc_rc_per_pic(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.rc_per_pic); - RADEON_ENC_CS(enc->enc_pic.rc_per_pic.qp); - RADEON_ENC_CS(enc->enc_pic.rc_per_pic.min_qp_app); - RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_qp_app); - RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_au_size); - RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enabled_filler_data); - RADEON_ENC_CS(enc->enc_pic.rc_per_pic.skip_frame_enable); - RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enforce_hrd); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.rc_per_pic); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.qp); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.min_qp_app); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_qp_app); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_au_size); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enabled_filler_data); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.skip_frame_enable); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enforce_hrd); + RADEON_ENC_END(); } static void radeon_enc_encode_params(struct radeon_encoder *enc) { - switch(enc->enc_pic.picture_type) { - case PIPE_H264_ENC_PICTURE_TYPE_I: - case PIPE_H264_ENC_PICTURE_TYPE_IDR: - enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_I; - break; - case PIPE_H264_ENC_PICTURE_TYPE_P: - enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_P; - break; - case PIPE_H264_ENC_PICTURE_TYPE_SKIP: - enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_P_SKIP; - break; - case PIPE_H264_ENC_PICTURE_TYPE_B: - enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_B; - break; - default: - enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_I; - } - - enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size; - enc->enc_pic.enc_params.input_pic_luma_pitch = enc->luma->u.gfx9.surf_pitch; - enc->enc_pic.enc_params.input_pic_chroma_pitch = enc->chroma->u.gfx9.surf_pitch; - enc->enc_pic.enc_params.input_pic_swizzle_mode = RENCODE_INPUT_SWIZZLE_MODE_LINEAR; - - if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) - enc->enc_pic.enc_params.reference_picture_index = 0xFFFFFFFF; - else - enc->enc_pic.enc_params.reference_picture_index = (enc->enc_pic.frame_num - 1) % 2; - - enc->enc_pic.enc_params.reconstructed_picture_index = enc->enc_pic.frame_num % 2; - - RADEON_ENC_BEGIN(enc->cmd.enc_params); - RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type); - RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size); - RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->luma->u.gfx9.surf_offset); - RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->chroma->u.gfx9.surf_offset); - RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_luma_pitch); - RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_chroma_pitch); - RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_swizzle_mode); - RADEON_ENC_CS(enc->enc_pic.enc_params.reference_picture_index); - RADEON_ENC_CS(enc->enc_pic.enc_params.reconstructed_picture_index); - RADEON_ENC_END(); + switch (enc->enc_pic.picture_type) { + case PIPE_H264_ENC_PICTURE_TYPE_I: + case PIPE_H264_ENC_PICTURE_TYPE_IDR: + enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_I; + break; + case PIPE_H264_ENC_PICTURE_TYPE_P: + enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_P; + break; + case PIPE_H264_ENC_PICTURE_TYPE_SKIP: + enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_P_SKIP; + break; + case PIPE_H264_ENC_PICTURE_TYPE_B: + enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_B; + break; + default: + enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_I; + } + + enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size; + enc->enc_pic.enc_params.input_pic_luma_pitch = enc->luma->u.gfx9.surf_pitch; + enc->enc_pic.enc_params.input_pic_chroma_pitch = enc->chroma->u.gfx9.surf_pitch; + enc->enc_pic.enc_params.input_pic_swizzle_mode = RENCODE_INPUT_SWIZZLE_MODE_LINEAR; + + if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) + enc->enc_pic.enc_params.reference_picture_index = 0xFFFFFFFF; + else + enc->enc_pic.enc_params.reference_picture_index = (enc->enc_pic.frame_num - 1) % 2; + + enc->enc_pic.enc_params.reconstructed_picture_index = enc->enc_pic.frame_num % 2; + + RADEON_ENC_BEGIN(enc->cmd.enc_params); + RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type); + RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size); + RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->luma->u.gfx9.surf_offset); + RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->chroma->u.gfx9.surf_offset); + RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_luma_pitch); + RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_chroma_pitch); + RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_swizzle_mode); + RADEON_ENC_CS(enc->enc_pic.enc_params.reference_picture_index); + RADEON_ENC_CS(enc->enc_pic.enc_params.reconstructed_picture_index); + RADEON_ENC_END(); } static void radeon_enc_encode_params_hevc(struct radeon_encoder *enc) { - switch(enc->enc_pic.picture_type) { - case PIPE_H265_ENC_PICTURE_TYPE_I: - case PIPE_H265_ENC_PICTURE_TYPE_IDR: - enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_I; - break; - case PIPE_H265_ENC_PICTURE_TYPE_P: - enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_P; - break; - case PIPE_H265_ENC_PICTURE_TYPE_SKIP: - enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_P_SKIP; - break; - case PIPE_H265_ENC_PICTURE_TYPE_B: - enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_B; - break; - default: - enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_I; - } - - enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size; - enc->enc_pic.enc_params.input_pic_luma_pitch = enc->luma->u.gfx9.surf_pitch; - enc->enc_pic.enc_params.input_pic_chroma_pitch = enc->chroma->u.gfx9.surf_pitch; - enc->enc_pic.enc_params.input_pic_swizzle_mode = RENCODE_INPUT_SWIZZLE_MODE_LINEAR; - - if(enc->enc_pic.enc_params.pic_type == RENCODE_PICTURE_TYPE_I) - enc->enc_pic.enc_params.reference_picture_index = 0xFFFFFFFF; - else - enc->enc_pic.enc_params.reference_picture_index = (enc->enc_pic.frame_num - 1) % 2; - - enc->enc_pic.enc_params.reconstructed_picture_index = enc->enc_pic.frame_num % 2; - - RADEON_ENC_BEGIN(enc->cmd.enc_params); - RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type); - RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size); - RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->luma->u.gfx9.surf_offset); - RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->chroma->u.gfx9.surf_offset); - RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_luma_pitch); - RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_chroma_pitch); - RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_swizzle_mode); - RADEON_ENC_CS(enc->enc_pic.enc_params.reference_picture_index); - RADEON_ENC_CS(enc->enc_pic.enc_params.reconstructed_picture_index); - RADEON_ENC_END(); + switch (enc->enc_pic.picture_type) { + case PIPE_H265_ENC_PICTURE_TYPE_I: + case PIPE_H265_ENC_PICTURE_TYPE_IDR: + enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_I; + break; + case PIPE_H265_ENC_PICTURE_TYPE_P: + enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_P; + break; + case PIPE_H265_ENC_PICTURE_TYPE_SKIP: + enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_P_SKIP; + break; + case PIPE_H265_ENC_PICTURE_TYPE_B: + enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_B; + break; + default: + enc->enc_pic.enc_params.pic_type = RENCODE_PICTURE_TYPE_I; + } + + enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size; + enc->enc_pic.enc_params.input_pic_luma_pitch = enc->luma->u.gfx9.surf_pitch; + enc->enc_pic.enc_params.input_pic_chroma_pitch = enc->chroma->u.gfx9.surf_pitch; + enc->enc_pic.enc_params.input_pic_swizzle_mode = RENCODE_INPUT_SWIZZLE_MODE_LINEAR; + + if (enc->enc_pic.enc_params.pic_type == RENCODE_PICTURE_TYPE_I) + enc->enc_pic.enc_params.reference_picture_index = 0xFFFFFFFF; + else + enc->enc_pic.enc_params.reference_picture_index = (enc->enc_pic.frame_num - 1) % 2; + + enc->enc_pic.enc_params.reconstructed_picture_index = enc->enc_pic.frame_num % 2; + + RADEON_ENC_BEGIN(enc->cmd.enc_params); + RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type); + RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size); + RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->luma->u.gfx9.surf_offset); + RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->chroma->u.gfx9.surf_offset); + RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_luma_pitch); + RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_chroma_pitch); + RADEON_ENC_CS(enc->enc_pic.enc_params.input_pic_swizzle_mode); + RADEON_ENC_CS(enc->enc_pic.enc_params.reference_picture_index); + RADEON_ENC_CS(enc->enc_pic.enc_params.reconstructed_picture_index); + RADEON_ENC_END(); } static void radeon_enc_encode_params_h264(struct radeon_encoder *enc) { - enc->enc_pic.h264_enc_params.input_picture_structure = RENCODE_H264_PICTURE_STRUCTURE_FRAME; - enc->enc_pic.h264_enc_params.interlaced_mode = RENCODE_H264_INTERLACING_MODE_PROGRESSIVE; - enc->enc_pic.h264_enc_params.reference_picture_structure = RENCODE_H264_PICTURE_STRUCTURE_FRAME; - enc->enc_pic.h264_enc_params.reference_picture1_index = 0xFFFFFFFF; - - RADEON_ENC_BEGIN(enc->cmd.enc_params_h264); - RADEON_ENC_CS(enc->enc_pic.h264_enc_params.input_picture_structure); - RADEON_ENC_CS(enc->enc_pic.h264_enc_params.interlaced_mode); - RADEON_ENC_CS(enc->enc_pic.h264_enc_params.reference_picture_structure); - RADEON_ENC_CS(enc->enc_pic.h264_enc_params.reference_picture1_index); - RADEON_ENC_END(); + enc->enc_pic.h264_enc_params.input_picture_structure = RENCODE_H264_PICTURE_STRUCTURE_FRAME; + enc->enc_pic.h264_enc_params.interlaced_mode = RENCODE_H264_INTERLACING_MODE_PROGRESSIVE; + enc->enc_pic.h264_enc_params.reference_picture_structure = RENCODE_H264_PICTURE_STRUCTURE_FRAME; + enc->enc_pic.h264_enc_params.reference_picture1_index = 0xFFFFFFFF; + + RADEON_ENC_BEGIN(enc->cmd.enc_params_h264); + RADEON_ENC_CS(enc->enc_pic.h264_enc_params.input_picture_structure); + RADEON_ENC_CS(enc->enc_pic.h264_enc_params.interlaced_mode); + RADEON_ENC_CS(enc->enc_pic.h264_enc_params.reference_picture_structure); + RADEON_ENC_CS(enc->enc_pic.h264_enc_params.reference_picture1_index); + RADEON_ENC_END(); } static void radeon_enc_op_init(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(RENCODE_IB_OP_INITIALIZE); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(RENCODE_IB_OP_INITIALIZE); + RADEON_ENC_END(); } static void radeon_enc_op_close(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(RENCODE_IB_OP_CLOSE_SESSION); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(RENCODE_IB_OP_CLOSE_SESSION); + RADEON_ENC_END(); } static void radeon_enc_op_enc(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(RENCODE_IB_OP_ENCODE); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(RENCODE_IB_OP_ENCODE); + RADEON_ENC_END(); } static void radeon_enc_op_init_rc(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(RENCODE_IB_OP_INIT_RC); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(RENCODE_IB_OP_INIT_RC); + RADEON_ENC_END(); } static void radeon_enc_op_init_rc_vbv(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL); + RADEON_ENC_END(); } static void radeon_enc_op_speed(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(RENCODE_IB_OP_SET_SPEED_ENCODING_MODE); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(RENCODE_IB_OP_SET_SPEED_ENCODING_MODE); + RADEON_ENC_END(); } static void begin(struct radeon_encoder *enc) { - enc->session_info(enc); - enc->total_task_size = 0; - enc->task_info(enc, enc->need_feedback); - enc->op_init(enc); - - enc->session_init(enc); - enc->slice_control(enc); - enc->spec_misc(enc); - enc->deblocking_filter(enc); - - enc->layer_control(enc); - enc->rc_session_init(enc); - enc->quality_params(enc); - enc->layer_select(enc); - enc->rc_layer_init(enc); - enc->layer_select(enc); - enc->rc_per_pic(enc); - enc->op_init_rc(enc); - enc->op_init_rc_vbv(enc); - *enc->p_task_size = (enc->total_task_size); + enc->session_info(enc); + enc->total_task_size = 0; + enc->task_info(enc, enc->need_feedback); + enc->op_init(enc); + + enc->session_init(enc); + enc->slice_control(enc); + enc->spec_misc(enc); + enc->deblocking_filter(enc); + + enc->layer_control(enc); + enc->rc_session_init(enc); + enc->quality_params(enc); + enc->layer_select(enc); + enc->rc_layer_init(enc); + enc->layer_select(enc); + enc->rc_per_pic(enc); + enc->op_init_rc(enc); + enc->op_init_rc_vbv(enc); + *enc->p_task_size = (enc->total_task_size); } static void radeon_enc_headers_h264(struct radeon_encoder *enc) { - if (enc->enc_pic.is_idr) { - enc->nalu_sps(enc); - enc->nalu_pps(enc); - } - enc->slice_header(enc); - enc->encode_params(enc); - enc->encode_params_codec_spec(enc); + if (enc->enc_pic.is_idr) { + enc->nalu_sps(enc); + enc->nalu_pps(enc); + } + enc->slice_header(enc); + enc->encode_params(enc); + enc->encode_params_codec_spec(enc); } static void radeon_enc_headers_hevc(struct radeon_encoder *enc) { - enc->nalu_aud(enc); - if (enc->enc_pic.is_idr) { - enc->nalu_vps(enc); - enc->nalu_pps(enc); - enc->nalu_sps(enc); - } - enc->slice_header(enc); - enc->encode_params(enc); + enc->nalu_aud(enc); + if (enc->enc_pic.is_idr) { + enc->nalu_vps(enc); + enc->nalu_pps(enc); + enc->nalu_sps(enc); + } + enc->slice_header(enc); + enc->encode_params(enc); } static void encode(struct radeon_encoder *enc) { - enc->session_info(enc); - enc->total_task_size = 0; - enc->task_info(enc, enc->need_feedback); - - enc->encode_headers(enc); - enc->ctx(enc); - enc->bitstream(enc); - enc->feedback(enc); - enc->intra_refresh(enc); - - enc->op_speed(enc); - enc->op_enc(enc); - *enc->p_task_size = (enc->total_task_size); + enc->session_info(enc); + enc->total_task_size = 0; + enc->task_info(enc, enc->need_feedback); + + enc->encode_headers(enc); + enc->ctx(enc); + enc->bitstream(enc); + enc->feedback(enc); + enc->intra_refresh(enc); + + enc->op_speed(enc); + enc->op_enc(enc); + *enc->p_task_size = (enc->total_task_size); } static void destroy(struct radeon_encoder *enc) { - enc->session_info(enc); - enc->total_task_size = 0; - enc->task_info(enc, enc->need_feedback); - enc->op_close(enc); - *enc->p_task_size = (enc->total_task_size); + enc->session_info(enc); + enc->total_task_size = 0; + enc->task_info(enc, enc->need_feedback); + enc->op_close(enc); + *enc->p_task_size = (enc->total_task_size); } void radeon_enc_1_2_init(struct radeon_encoder *enc) { - enc->begin = begin; - enc->encode = encode; - enc->destroy = destroy; - enc->session_info = radeon_enc_session_info; - enc->task_info = radeon_enc_task_info; - enc->layer_control = radeon_enc_layer_control; - enc->layer_select = radeon_enc_layer_select; - enc->rc_session_init = radeon_enc_rc_session_init; - enc->rc_layer_init = radeon_enc_rc_layer_init; - enc->quality_params = radeon_enc_quality_params; - enc->ctx = radeon_enc_ctx; - enc->bitstream = radeon_enc_bitstream; - enc->feedback = radeon_enc_feedback; - enc->intra_refresh = radeon_enc_intra_refresh; - enc->rc_per_pic = radeon_enc_rc_per_pic; - enc->encode_params = radeon_enc_encode_params; - enc->op_init = radeon_enc_op_init; - enc->op_close = radeon_enc_op_close; - enc->op_enc = radeon_enc_op_enc; - enc->op_init_rc = radeon_enc_op_init_rc; - enc->op_init_rc_vbv = radeon_enc_op_init_rc_vbv; - enc->op_speed = radeon_enc_op_speed; - - if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC) { - enc->session_init = radeon_enc_session_init; - enc->slice_control = radeon_enc_slice_control; - enc->spec_misc = radeon_enc_spec_misc; - enc->deblocking_filter = radeon_enc_deblocking_filter_h264; - enc->nalu_sps = radeon_enc_nalu_sps; - enc->nalu_pps = radeon_enc_nalu_pps; - enc->slice_header = radeon_enc_slice_header; - enc->encode_params = radeon_enc_encode_params; - enc->encode_params_codec_spec = radeon_enc_encode_params_h264; - enc->encode_headers = radeon_enc_headers_h264; - } else if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_HEVC) { - enc->session_init = radeon_enc_session_init_hevc; - enc->slice_control = radeon_enc_slice_control_hevc; - enc->spec_misc = radeon_enc_spec_misc_hevc; - enc->deblocking_filter = radeon_enc_deblocking_filter_hevc; - enc->nalu_sps = radeon_enc_nalu_sps_hevc; - enc->nalu_pps = radeon_enc_nalu_pps_hevc; - enc->nalu_vps = radeon_enc_nalu_vps; - enc->nalu_aud = radeon_enc_nalu_aud_hevc; - enc->slice_header = radeon_enc_slice_header_hevc; - enc->encode_params = radeon_enc_encode_params_hevc; - enc->encode_headers = radeon_enc_headers_hevc; - } - - enc->cmd.session_info = RENCODE_IB_PARAM_SESSION_INFO; - enc->cmd.task_info = RENCODE_IB_PARAM_TASK_INFO; - enc->cmd.session_init = RENCODE_IB_PARAM_SESSION_INIT; - enc->cmd.layer_control = RENCODE_IB_PARAM_LAYER_CONTROL; - enc->cmd.layer_select = RENCODE_IB_PARAM_LAYER_SELECT; - enc->cmd.rc_session_init = RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT; - enc->cmd.rc_layer_init = RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT; - enc->cmd.rc_per_pic = RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE; - enc->cmd.quality_params = RENCODE_IB_PARAM_QUALITY_PARAMS; - enc->cmd.nalu = RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU; - enc->cmd.slice_header = RENCODE_IB_PARAM_SLICE_HEADER; - enc->cmd.enc_params = RENCODE_IB_PARAM_ENCODE_PARAMS; - enc->cmd.intra_refresh = RENCODE_IB_PARAM_INTRA_REFRESH; - enc->cmd.ctx = RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER; - enc->cmd.bitstream = RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER; - enc->cmd.feedback = RENCODE_IB_PARAM_FEEDBACK_BUFFER; - enc->cmd.slice_control_hevc = RENCODE_HEVC_IB_PARAM_SLICE_CONTROL; - enc->cmd.spec_misc_hevc = RENCODE_HEVC_IB_PARAM_SPEC_MISC; - enc->cmd.deblocking_filter_hevc = RENCODE_HEVC_IB_PARAM_DEBLOCKING_FILTER; - enc->cmd.slice_control_h264 = RENCODE_H264_IB_PARAM_SLICE_CONTROL; - enc->cmd.spec_misc_h264 = RENCODE_H264_IB_PARAM_SPEC_MISC; - enc->cmd.enc_params_h264 = RENCODE_H264_IB_PARAM_ENCODE_PARAMS; - enc->cmd.deblocking_filter_h264 = RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER; - - enc->enc_pic.session_info.interface_version = - ((RENCODE_FW_INTERFACE_MAJOR_VERSION << RENCODE_IF_MAJOR_VERSION_SHIFT) | - (RENCODE_FW_INTERFACE_MINOR_VERSION << RENCODE_IF_MINOR_VERSION_SHIFT)); + enc->begin = begin; + enc->encode = encode; + enc->destroy = destroy; + enc->session_info = radeon_enc_session_info; + enc->task_info = radeon_enc_task_info; + enc->layer_control = radeon_enc_layer_control; + enc->layer_select = radeon_enc_layer_select; + enc->rc_session_init = radeon_enc_rc_session_init; + enc->rc_layer_init = radeon_enc_rc_layer_init; + enc->quality_params = radeon_enc_quality_params; + enc->ctx = radeon_enc_ctx; + enc->bitstream = radeon_enc_bitstream; + enc->feedback = radeon_enc_feedback; + enc->intra_refresh = radeon_enc_intra_refresh; + enc->rc_per_pic = radeon_enc_rc_per_pic; + enc->encode_params = radeon_enc_encode_params; + enc->op_init = radeon_enc_op_init; + enc->op_close = radeon_enc_op_close; + enc->op_enc = radeon_enc_op_enc; + enc->op_init_rc = radeon_enc_op_init_rc; + enc->op_init_rc_vbv = radeon_enc_op_init_rc_vbv; + enc->op_speed = radeon_enc_op_speed; + + if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC) { + enc->session_init = radeon_enc_session_init; + enc->slice_control = radeon_enc_slice_control; + enc->spec_misc = radeon_enc_spec_misc; + enc->deblocking_filter = radeon_enc_deblocking_filter_h264; + enc->nalu_sps = radeon_enc_nalu_sps; + enc->nalu_pps = radeon_enc_nalu_pps; + enc->slice_header = radeon_enc_slice_header; + enc->encode_params = radeon_enc_encode_params; + enc->encode_params_codec_spec = radeon_enc_encode_params_h264; + enc->encode_headers = radeon_enc_headers_h264; + } else if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_HEVC) { + enc->session_init = radeon_enc_session_init_hevc; + enc->slice_control = radeon_enc_slice_control_hevc; + enc->spec_misc = radeon_enc_spec_misc_hevc; + enc->deblocking_filter = radeon_enc_deblocking_filter_hevc; + enc->nalu_sps = radeon_enc_nalu_sps_hevc; + enc->nalu_pps = radeon_enc_nalu_pps_hevc; + enc->nalu_vps = radeon_enc_nalu_vps; + enc->nalu_aud = radeon_enc_nalu_aud_hevc; + enc->slice_header = radeon_enc_slice_header_hevc; + enc->encode_params = radeon_enc_encode_params_hevc; + enc->encode_headers = radeon_enc_headers_hevc; + } + + enc->cmd.session_info = RENCODE_IB_PARAM_SESSION_INFO; + enc->cmd.task_info = RENCODE_IB_PARAM_TASK_INFO; + enc->cmd.session_init = RENCODE_IB_PARAM_SESSION_INIT; + enc->cmd.layer_control = RENCODE_IB_PARAM_LAYER_CONTROL; + enc->cmd.layer_select = RENCODE_IB_PARAM_LAYER_SELECT; + enc->cmd.rc_session_init = RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT; + enc->cmd.rc_layer_init = RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT; + enc->cmd.rc_per_pic = RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE; + enc->cmd.quality_params = RENCODE_IB_PARAM_QUALITY_PARAMS; + enc->cmd.nalu = RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU; + enc->cmd.slice_header = RENCODE_IB_PARAM_SLICE_HEADER; + enc->cmd.enc_params = RENCODE_IB_PARAM_ENCODE_PARAMS; + enc->cmd.intra_refresh = RENCODE_IB_PARAM_INTRA_REFRESH; + enc->cmd.ctx = RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER; + enc->cmd.bitstream = RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER; + enc->cmd.feedback = RENCODE_IB_PARAM_FEEDBACK_BUFFER; + enc->cmd.slice_control_hevc = RENCODE_HEVC_IB_PARAM_SLICE_CONTROL; + enc->cmd.spec_misc_hevc = RENCODE_HEVC_IB_PARAM_SPEC_MISC; + enc->cmd.deblocking_filter_hevc = RENCODE_HEVC_IB_PARAM_DEBLOCKING_FILTER; + enc->cmd.slice_control_h264 = RENCODE_H264_IB_PARAM_SLICE_CONTROL; + enc->cmd.spec_misc_h264 = RENCODE_H264_IB_PARAM_SPEC_MISC; + enc->cmd.enc_params_h264 = RENCODE_H264_IB_PARAM_ENCODE_PARAMS; + enc->cmd.deblocking_filter_h264 = RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER; + + enc->enc_pic.session_info.interface_version = + ((RENCODE_FW_INTERFACE_MAJOR_VERSION << RENCODE_IF_MAJOR_VERSION_SHIFT) | + (RENCODE_FW_INTERFACE_MINOR_VERSION << RENCODE_IF_MINOR_VERSION_SHIFT)); } diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c b/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c index e83c06275e1..545bb902277 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c @@ -25,369 +25,364 @@ * **************************************************************************/ -#include <stdio.h> - #include "pipe/p_video_codec.h" - -#include "util/u_video.h" - -#include "si_pipe.h" -#include "radeon_video.h" #include "radeon_vcn_enc.h" +#include "radeon_video.h" +#include "si_pipe.h" +#include "util/u_video.h" -#define RENCODE_FW_INTERFACE_MAJOR_VERSION 1 -#define RENCODE_FW_INTERFACE_MINOR_VERSION 1 - -#define RENCODE_IB_PARAM_SESSION_INFO 0x00000001 -#define RENCODE_IB_PARAM_TASK_INFO 0x00000002 -#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 -#define RENCODE_IB_PARAM_LAYER_CONTROL 0x00000004 -#define RENCODE_IB_PARAM_LAYER_SELECT 0x00000005 -#define RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000006 -#define RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000007 -#define RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x00000008 -#define RENCODE_IB_PARAM_QUALITY_PARAMS 0x00000009 -#define RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU 0x0000000a -#define RENCODE_IB_PARAM_SLICE_HEADER 0x0000000b -#define RENCODE_IB_PARAM_INPUT_FORMAT 0x0000000c -#define RENCODE_IB_PARAM_OUTPUT_FORMAT 0x0000000d -#define RENCODE_IB_PARAM_ENCODE_PARAMS 0x0000000f -#define RENCODE_IB_PARAM_INTRA_REFRESH 0x00000010 -#define RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x00000011 -#define RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER 0x00000012 -#define RENCODE_IB_PARAM_FEEDBACK_BUFFER 0x00000015 - -#define RENCODE_HEVC_IB_PARAM_SLICE_CONTROL 0x00100001 -#define RENCODE_HEVC_IB_PARAM_SPEC_MISC 0x00100002 -#define RENCODE_HEVC_IB_PARAM_LOOP_FILTER 0x00100003 - -#define RENCODE_H264_IB_PARAM_SLICE_CONTROL 0x00200001 -#define RENCODE_H264_IB_PARAM_SPEC_MISC 0x00200002 -#define RENCODE_H264_IB_PARAM_ENCODE_PARAMS 0x00200003 -#define RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER 0x00200004 - -#define RENCODE_COLOR_VOLUME_G22_BT709 0 -#define RENCODE_COLOR_VOLUME_G10_BT2020 3 - -#define RENCODE_COLOR_BIT_DEPTH_8_BIT 0 -#define RENCODE_COLOR_BIT_DEPTH_10_BIT 1 - -#define RENCODE_COLOR_PACKING_FORMAT_NV12 0 -#define RENCODE_COLOR_PACKING_FORMAT_P010 1 +#include <stdio.h> +#define RENCODE_FW_INTERFACE_MAJOR_VERSION 1 +#define RENCODE_FW_INTERFACE_MINOR_VERSION 1 + +#define RENCODE_IB_PARAM_SESSION_INFO 0x00000001 +#define RENCODE_IB_PARAM_TASK_INFO 0x00000002 +#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 +#define RENCODE_IB_PARAM_LAYER_CONTROL 0x00000004 +#define RENCODE_IB_PARAM_LAYER_SELECT 0x00000005 +#define RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000006 +#define RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000007 +#define RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x00000008 +#define RENCODE_IB_PARAM_QUALITY_PARAMS 0x00000009 +#define RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU 0x0000000a +#define RENCODE_IB_PARAM_SLICE_HEADER 0x0000000b +#define RENCODE_IB_PARAM_INPUT_FORMAT 0x0000000c +#define RENCODE_IB_PARAM_OUTPUT_FORMAT 0x0000000d +#define RENCODE_IB_PARAM_ENCODE_PARAMS 0x0000000f +#define RENCODE_IB_PARAM_INTRA_REFRESH 0x00000010 +#define RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x00000011 +#define RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER 0x00000012 +#define RENCODE_IB_PARAM_FEEDBACK_BUFFER 0x00000015 + +#define RENCODE_HEVC_IB_PARAM_SLICE_CONTROL 0x00100001 +#define RENCODE_HEVC_IB_PARAM_SPEC_MISC 0x00100002 +#define RENCODE_HEVC_IB_PARAM_LOOP_FILTER 0x00100003 + +#define RENCODE_H264_IB_PARAM_SLICE_CONTROL 0x00200001 +#define RENCODE_H264_IB_PARAM_SPEC_MISC 0x00200002 +#define RENCODE_H264_IB_PARAM_ENCODE_PARAMS 0x00200003 +#define RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER 0x00200004 + +#define RENCODE_COLOR_VOLUME_G22_BT709 0 +#define RENCODE_COLOR_VOLUME_G10_BT2020 3 + +#define RENCODE_COLOR_BIT_DEPTH_8_BIT 0 +#define RENCODE_COLOR_BIT_DEPTH_10_BIT 1 + +#define RENCODE_COLOR_PACKING_FORMAT_NV12 0 +#define RENCODE_COLOR_PACKING_FORMAT_P010 1 static void radeon_enc_quality_params(struct radeon_encoder *enc) { - enc->enc_pic.quality_params.vbaq_mode = 0; - enc->enc_pic.quality_params.scene_change_sensitivity = 0; - enc->enc_pic.quality_params.scene_change_min_idr_interval = 0; - enc->enc_pic.quality_params.two_pass_search_center_map_mode = 0; - - RADEON_ENC_BEGIN(enc->cmd.quality_params); - RADEON_ENC_CS(enc->enc_pic.quality_params.vbaq_mode); - RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_sensitivity); - RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_min_idr_interval); - RADEON_ENC_CS(enc->enc_pic.quality_params.two_pass_search_center_map_mode); - RADEON_ENC_END(); + enc->enc_pic.quality_params.vbaq_mode = 0; + enc->enc_pic.quality_params.scene_change_sensitivity = 0; + enc->enc_pic.quality_params.scene_change_min_idr_interval = 0; + enc->enc_pic.quality_params.two_pass_search_center_map_mode = 0; + + RADEON_ENC_BEGIN(enc->cmd.quality_params); + RADEON_ENC_CS(enc->enc_pic.quality_params.vbaq_mode); + RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_sensitivity); + RADEON_ENC_CS(enc->enc_pic.quality_params.scene_change_min_idr_interval); + RADEON_ENC_CS(enc->enc_pic.quality_params.two_pass_search_center_map_mode); + RADEON_ENC_END(); } static void radeon_enc_loop_filter_hevc(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.deblocking_filter_hevc); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.deblocking_filter_disabled); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.beta_offset_div2); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.tc_offset_div2); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cb_qp_offset); - RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cr_qp_offset); - RADEON_ENC_CS(1); - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.deblocking_filter_hevc); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.deblocking_filter_disabled); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.beta_offset_div2); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.tc_offset_div2); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cb_qp_offset); + RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cr_qp_offset); + RADEON_ENC_CS(1); + RADEON_ENC_END(); } static void radeon_enc_nalu_sps_hevc(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.nalu); - RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; - int i; - - radeon_enc_reset(enc); - radeon_enc_set_emulation_prevention(enc, false); - radeon_enc_code_fixed_bits(enc, 0x00000001, 32); - radeon_enc_code_fixed_bits(enc, 0x4201, 16); - radeon_enc_byte_align(enc); - radeon_enc_set_emulation_prevention(enc, true); - radeon_enc_code_fixed_bits(enc, 0x0, 4); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1, 3); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 2); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5); - radeon_enc_code_fixed_bits(enc, 0x60000000, 32); - radeon_enc_code_fixed_bits(enc, 0xb0000000, 32); - radeon_enc_code_fixed_bits(enc, 0x0, 16); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8); - - for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) ; i++) - radeon_enc_code_fixed_bits(enc, 0x0, 2); - - if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) { - for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++) - radeon_enc_code_fixed_bits(enc, 0x0, 2); - } - - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, enc->enc_pic.chroma_format_idc); - radeon_enc_code_ue(enc, enc->enc_pic.session_init.aligned_picture_width); - radeon_enc_code_ue(enc, enc->enc_pic.session_init.aligned_picture_height); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8); - radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8); - radeon_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_ue(enc, 1); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3); - //Only support CTBSize 64 - radeon_enc_code_ue(enc, 6 - (enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 + 3)); - radeon_enc_code_ue(enc, enc->enc_pic.log2_min_transform_block_size_minus2); - radeon_enc_code_ue(enc, enc->enc_pic.log2_diff_max_min_transform_block_size); - radeon_enc_code_ue(enc, enc->enc_pic.max_transform_hierarchy_depth_inter); - radeon_enc_code_ue(enc, enc->enc_pic.max_transform_hierarchy_depth_intra); - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, !enc->enc_pic.hevc_spec_misc.amp_disabled, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.sample_adaptive_offset_enabled_flag, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1); - - radeon_enc_code_ue(enc, 1); - radeon_enc_code_ue(enc, 1); - radeon_enc_code_ue(enc, 0); - radeon_enc_code_ue(enc, 0); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - radeon_enc_code_fixed_bits(enc, 0, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled, 1); - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - - radeon_enc_code_fixed_bits(enc, 0x1, 1); - - radeon_enc_byte_align(enc); - radeon_enc_flush_headers(enc); - *size_in_bytes = (enc->bits_output + 7) / 8; - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.nalu); + RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS); + uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + int i; + + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + radeon_enc_code_fixed_bits(enc, 0x00000001, 32); + radeon_enc_code_fixed_bits(enc, 0x4201, 16); + radeon_enc_byte_align(enc); + radeon_enc_set_emulation_prevention(enc, true); + radeon_enc_code_fixed_bits(enc, 0x0, 4); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1, 3); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 2); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5); + radeon_enc_code_fixed_bits(enc, 0x60000000, 32); + radeon_enc_code_fixed_bits(enc, 0xb0000000, 32); + radeon_enc_code_fixed_bits(enc, 0x0, 16); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8); + + for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++) + radeon_enc_code_fixed_bits(enc, 0x0, 2); + + if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) { + for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++) + radeon_enc_code_fixed_bits(enc, 0x0, 2); + } + + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, enc->enc_pic.chroma_format_idc); + radeon_enc_code_ue(enc, enc->enc_pic.session_init.aligned_picture_width); + radeon_enc_code_ue(enc, enc->enc_pic.session_init.aligned_picture_height); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8); + radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8); + radeon_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, 1); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3); + // Only support CTBSize 64 + radeon_enc_code_ue(enc, + 6 - (enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 + 3)); + radeon_enc_code_ue(enc, enc->enc_pic.log2_min_transform_block_size_minus2); + radeon_enc_code_ue(enc, enc->enc_pic.log2_diff_max_min_transform_block_size); + radeon_enc_code_ue(enc, enc->enc_pic.max_transform_hierarchy_depth_inter); + radeon_enc_code_ue(enc, enc->enc_pic.max_transform_hierarchy_depth_intra); + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, !enc->enc_pic.hevc_spec_misc.amp_disabled, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.sample_adaptive_offset_enabled_flag, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1); + + radeon_enc_code_ue(enc, 1); + radeon_enc_code_ue(enc, 1); + radeon_enc_code_ue(enc, 0); + radeon_enc_code_ue(enc, 0); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_fixed_bits(enc, 0, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled, 1); + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_fixed_bits(enc, 0x1, 1); + + radeon_enc_byte_align(enc); + radeon_enc_flush_headers(enc); + *size_in_bytes = (enc->bits_output + 7) / 8; + RADEON_ENC_END(); } static void radeon_enc_nalu_pps_hevc(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.nalu); - RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS); - uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; - radeon_enc_reset(enc); - radeon_enc_set_emulation_prevention(enc, false); - radeon_enc_code_fixed_bits(enc, 0x00000001, 32); - radeon_enc_code_fixed_bits(enc, 0x4401, 16); - radeon_enc_byte_align(enc); - radeon_enc_set_emulation_prevention(enc, true); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 4); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_ue(enc, 0x0); - radeon_enc_code_se(enc, 0x0); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - if (enc->enc_pic.rc_session_init.rate_control_method == - RENCODE_RATE_CONTROL_METHOD_NONE) - radeon_enc_code_fixed_bits(enc, 0x0, 1); - else { - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_ue(enc, 0x0); - } - radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset); - radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 2); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled, 1); - radeon_enc_code_fixed_bits(enc, 0x1, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.deblocking_filter_disabled, 1); - - if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) { - radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2); - radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2); - } - - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2); - radeon_enc_code_fixed_bits(enc, 0x0, 2); - - radeon_enc_code_fixed_bits(enc, 0x1, 1); - - radeon_enc_byte_align(enc); - radeon_enc_flush_headers(enc); - *size_in_bytes = (enc->bits_output + 7) / 8; - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.nalu); + RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS); + uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++]; + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + radeon_enc_code_fixed_bits(enc, 0x00000001, 32); + radeon_enc_code_fixed_bits(enc, 0x4401, 16); + radeon_enc_byte_align(enc); + radeon_enc_set_emulation_prevention(enc, true); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 4); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_se(enc, 0x0); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + if (enc->enc_pic.rc_session_init.rate_control_method == RENCODE_RATE_CONTROL_METHOD_NONE) + radeon_enc_code_fixed_bits(enc, 0x0, 1); + else { + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_ue(enc, 0x0); + } + radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset); + radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 2); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled, 1); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.deblocking_filter_disabled, 1); + + if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) { + radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2); + radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2); + } + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2); + radeon_enc_code_fixed_bits(enc, 0x0, 2); + + radeon_enc_code_fixed_bits(enc, 0x1, 1); + + radeon_enc_byte_align(enc); + radeon_enc_flush_headers(enc); + *size_in_bytes = (enc->bits_output + 7) / 8; + RADEON_ENC_END(); } - static void radeon_enc_input_format(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.input_format); - if (enc->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { - RADEON_ENC_CS(RENCODE_COLOR_VOLUME_G10_BT2020); - RADEON_ENC_CS(0); - RADEON_ENC_CS(0); - RADEON_ENC_CS(0); - RADEON_ENC_CS(0); - RADEON_ENC_CS(RENCODE_COLOR_BIT_DEPTH_10_BIT); - RADEON_ENC_CS(RENCODE_COLOR_PACKING_FORMAT_P010); - } else { - RADEON_ENC_CS(RENCODE_COLOR_VOLUME_G22_BT709); - RADEON_ENC_CS(0); - RADEON_ENC_CS(0); - RADEON_ENC_CS(0); - RADEON_ENC_CS(0); - RADEON_ENC_CS(RENCODE_COLOR_BIT_DEPTH_8_BIT); - RADEON_ENC_CS(RENCODE_COLOR_PACKING_FORMAT_NV12); - } - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.input_format); + if (enc->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { + RADEON_ENC_CS(RENCODE_COLOR_VOLUME_G10_BT2020); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(RENCODE_COLOR_BIT_DEPTH_10_BIT); + RADEON_ENC_CS(RENCODE_COLOR_PACKING_FORMAT_P010); + } else { + RADEON_ENC_CS(RENCODE_COLOR_VOLUME_G22_BT709); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(RENCODE_COLOR_BIT_DEPTH_8_BIT); + RADEON_ENC_CS(RENCODE_COLOR_PACKING_FORMAT_NV12); + } + RADEON_ENC_END(); } static void radeon_enc_output_format(struct radeon_encoder *enc) { - RADEON_ENC_BEGIN(enc->cmd.output_format); - if (enc->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { - RADEON_ENC_CS(RENCODE_COLOR_VOLUME_G10_BT2020); - RADEON_ENC_CS(0); - RADEON_ENC_CS(0); - RADEON_ENC_CS(RENCODE_COLOR_BIT_DEPTH_10_BIT); - } else { - RADEON_ENC_CS(RENCODE_COLOR_VOLUME_G22_BT709); - RADEON_ENC_CS(0); - RADEON_ENC_CS(0); - RADEON_ENC_CS(RENCODE_COLOR_BIT_DEPTH_8_BIT); - } - RADEON_ENC_END(); + RADEON_ENC_BEGIN(enc->cmd.output_format); + if (enc->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { + RADEON_ENC_CS(RENCODE_COLOR_VOLUME_G10_BT2020); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(RENCODE_COLOR_BIT_DEPTH_10_BIT); + } else { + RADEON_ENC_CS(RENCODE_COLOR_VOLUME_G22_BT709); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(RENCODE_COLOR_BIT_DEPTH_8_BIT); + } + RADEON_ENC_END(); } static void radeon_enc_ctx(struct radeon_encoder *enc) { - enc->enc_pic.ctx_buf.swizzle_mode = 0; - - uint32_t aligned_width = enc->enc_pic.session_init.aligned_picture_width; - uint32_t aligned_height = enc->enc_pic.session_init.aligned_picture_height; - - enc->enc_pic.ctx_buf.rec_luma_pitch = align(aligned_width, enc->alignment); - enc->enc_pic.ctx_buf.rec_chroma_pitch = align(aligned_width, enc->alignment); - - int luma_size = enc->enc_pic.ctx_buf.rec_luma_pitch * align(aligned_height, enc->alignment); - if (enc->enc_pic.bit_depth_luma_minus8 == 2) - luma_size *= 2; - int chroma_size = align(luma_size / 2, enc->alignment); - int offset = 0; - - enc->enc_pic.ctx_buf.num_reconstructed_pictures = 2; - for (int i = 0; i < enc->enc_pic.ctx_buf.num_reconstructed_pictures; i++) { - enc->enc_pic.ctx_buf.reconstructed_pictures[i].luma_offset = offset; - offset += luma_size; - enc->enc_pic.ctx_buf.reconstructed_pictures[i].chroma_offset = offset; - offset += chroma_size; - } - - RADEON_ENC_BEGIN(enc->cmd.ctx); - RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); - RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode); - RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch); - RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch); - RADEON_ENC_CS(enc->enc_pic.ctx_buf.num_reconstructed_pictures); - - for (int i = 0; i < enc->enc_pic.ctx_buf.num_reconstructed_pictures; i++) { - RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures[i].luma_offset); - RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures[i].chroma_offset); - } - - for (int i = 0; i < 136 ; i++) - RADEON_ENC_CS(0x00000000); - - RADEON_ENC_END(); + enc->enc_pic.ctx_buf.swizzle_mode = 0; + + uint32_t aligned_width = enc->enc_pic.session_init.aligned_picture_width; + uint32_t aligned_height = enc->enc_pic.session_init.aligned_picture_height; + + enc->enc_pic.ctx_buf.rec_luma_pitch = align(aligned_width, enc->alignment); + enc->enc_pic.ctx_buf.rec_chroma_pitch = align(aligned_width, enc->alignment); + + int luma_size = enc->enc_pic.ctx_buf.rec_luma_pitch * align(aligned_height, enc->alignment); + if (enc->enc_pic.bit_depth_luma_minus8 == 2) + luma_size *= 2; + int chroma_size = align(luma_size / 2, enc->alignment); + int offset = 0; + + enc->enc_pic.ctx_buf.num_reconstructed_pictures = 2; + for (int i = 0; i < enc->enc_pic.ctx_buf.num_reconstructed_pictures; i++) { + enc->enc_pic.ctx_buf.reconstructed_pictures[i].luma_offset = offset; + offset += luma_size; + enc->enc_pic.ctx_buf.reconstructed_pictures[i].chroma_offset = offset; + offset += chroma_size; + } + + RADEON_ENC_BEGIN(enc->cmd.ctx); + RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.num_reconstructed_pictures); + + for (int i = 0; i < enc->enc_pic.ctx_buf.num_reconstructed_pictures; i++) { + RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures[i].luma_offset); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures[i].chroma_offset); + } + + for (int i = 0; i < 136; i++) + RADEON_ENC_CS(0x00000000); + + RADEON_ENC_END(); } - static void encode(struct radeon_encoder *enc) { - enc->session_info(enc); - enc->total_task_size = 0; - enc->task_info(enc, enc->need_feedback); - - enc->encode_headers(enc); - enc->ctx(enc); - enc->bitstream(enc); - enc->feedback(enc); - enc->intra_refresh(enc); - enc->input_format(enc); - enc->output_format(enc); - - enc->op_speed(enc); - enc->op_enc(enc); - *enc->p_task_size = (enc->total_task_size); + enc->session_info(enc); + enc->total_task_size = 0; + enc->task_info(enc, enc->need_feedback); + + enc->encode_headers(enc); + enc->ctx(enc); + enc->bitstream(enc); + enc->feedback(enc); + enc->intra_refresh(enc); + enc->input_format(enc); + enc->output_format(enc); + + enc->op_speed(enc); + enc->op_enc(enc); + *enc->p_task_size = (enc->total_task_size); } void radeon_enc_2_0_init(struct radeon_encoder *enc) { - radeon_enc_1_2_init(enc); - enc->encode = encode; - enc->ctx = radeon_enc_ctx; - enc->quality_params = radeon_enc_quality_params; - enc->input_format = radeon_enc_input_format; - enc->output_format = radeon_enc_output_format; - - if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_HEVC) { - enc->deblocking_filter = radeon_enc_loop_filter_hevc; - enc->nalu_sps = radeon_enc_nalu_sps_hevc; - enc->nalu_pps = radeon_enc_nalu_pps_hevc; - } - - enc->cmd.session_info = RENCODE_IB_PARAM_SESSION_INFO; - enc->cmd.task_info = RENCODE_IB_PARAM_TASK_INFO; - enc->cmd.session_init = RENCODE_IB_PARAM_SESSION_INIT; - enc->cmd.layer_control = RENCODE_IB_PARAM_LAYER_CONTROL; - enc->cmd.layer_select = RENCODE_IB_PARAM_LAYER_SELECT; - enc->cmd.rc_session_init = RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT; - enc->cmd.rc_layer_init = RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT; - enc->cmd.rc_per_pic = RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE; - enc->cmd.quality_params = RENCODE_IB_PARAM_QUALITY_PARAMS; - enc->cmd.nalu = RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU; - enc->cmd.slice_header = RENCODE_IB_PARAM_SLICE_HEADER; - enc->cmd.input_format = RENCODE_IB_PARAM_INPUT_FORMAT; - enc->cmd.output_format = RENCODE_IB_PARAM_OUTPUT_FORMAT; - enc->cmd.enc_params = RENCODE_IB_PARAM_ENCODE_PARAMS; - enc->cmd.intra_refresh = RENCODE_IB_PARAM_INTRA_REFRESH; - enc->cmd.ctx = RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER; - enc->cmd.bitstream = RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER; - enc->cmd.feedback = RENCODE_IB_PARAM_FEEDBACK_BUFFER; - enc->cmd.slice_control_hevc = RENCODE_HEVC_IB_PARAM_SLICE_CONTROL; - enc->cmd.spec_misc_hevc = RENCODE_HEVC_IB_PARAM_SPEC_MISC; - enc->cmd.deblocking_filter_hevc = RENCODE_HEVC_IB_PARAM_LOOP_FILTER; - enc->cmd.slice_control_h264 = RENCODE_H264_IB_PARAM_SLICE_CONTROL; - enc->cmd.spec_misc_h264 = RENCODE_H264_IB_PARAM_SPEC_MISC; - enc->cmd.enc_params_h264 = RENCODE_H264_IB_PARAM_ENCODE_PARAMS; - enc->cmd.deblocking_filter_h264 = RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER; - - enc->enc_pic.session_info.interface_version = - ((RENCODE_FW_INTERFACE_MAJOR_VERSION << RENCODE_IF_MAJOR_VERSION_SHIFT) | - (RENCODE_FW_INTERFACE_MINOR_VERSION << RENCODE_IF_MINOR_VERSION_SHIFT)); + radeon_enc_1_2_init(enc); + enc->encode = encode; + enc->ctx = radeon_enc_ctx; + enc->quality_params = radeon_enc_quality_params; + enc->input_format = radeon_enc_input_format; + enc->output_format = radeon_enc_output_format; + + if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_HEVC) { + enc->deblocking_filter = radeon_enc_loop_filter_hevc; + enc->nalu_sps = radeon_enc_nalu_sps_hevc; + enc->nalu_pps = radeon_enc_nalu_pps_hevc; + } + + enc->cmd.session_info = RENCODE_IB_PARAM_SESSION_INFO; + enc->cmd.task_info = RENCODE_IB_PARAM_TASK_INFO; + enc->cmd.session_init = RENCODE_IB_PARAM_SESSION_INIT; + enc->cmd.layer_control = RENCODE_IB_PARAM_LAYER_CONTROL; + enc->cmd.layer_select = RENCODE_IB_PARAM_LAYER_SELECT; + enc->cmd.rc_session_init = RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT; + enc->cmd.rc_layer_init = RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT; + enc->cmd.rc_per_pic = RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE; + enc->cmd.quality_params = RENCODE_IB_PARAM_QUALITY_PARAMS; + enc->cmd.nalu = RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU; + enc->cmd.slice_header = RENCODE_IB_PARAM_SLICE_HEADER; + enc->cmd.input_format = RENCODE_IB_PARAM_INPUT_FORMAT; + enc->cmd.output_format = RENCODE_IB_PARAM_OUTPUT_FORMAT; + enc->cmd.enc_params = RENCODE_IB_PARAM_ENCODE_PARAMS; + enc->cmd.intra_refresh = RENCODE_IB_PARAM_INTRA_REFRESH; + enc->cmd.ctx = RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER; + enc->cmd.bitstream = RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER; + enc->cmd.feedback = RENCODE_IB_PARAM_FEEDBACK_BUFFER; + enc->cmd.slice_control_hevc = RENCODE_HEVC_IB_PARAM_SLICE_CONTROL; + enc->cmd.spec_misc_hevc = RENCODE_HEVC_IB_PARAM_SPEC_MISC; + enc->cmd.deblocking_filter_hevc = RENCODE_HEVC_IB_PARAM_LOOP_FILTER; + enc->cmd.slice_control_h264 = RENCODE_H264_IB_PARAM_SLICE_CONTROL; + enc->cmd.spec_misc_h264 = RENCODE_H264_IB_PARAM_SPEC_MISC; + enc->cmd.enc_params_h264 = RENCODE_H264_IB_PARAM_ENCODE_PARAMS; + enc->cmd.deblocking_filter_h264 = RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER; + + enc->enc_pic.session_info.interface_version = + ((RENCODE_FW_INTERFACE_MAJOR_VERSION << RENCODE_IF_MAJOR_VERSION_SHIFT) | + (RENCODE_FW_INTERFACE_MINOR_VERSION << RENCODE_IF_MINOR_VERSION_SHIFT)); } diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c index 0b45a5a0f36..8e2b1a3c87d 100644 --- a/src/gallium/drivers/radeon/radeon_video.c +++ b/src/gallium/drivers/radeon/radeon_video.c @@ -25,103 +25,99 @@ * **************************************************************************/ -#include <unistd.h> +#include "radeon_video.h" +#include "radeon_vce.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" #include "util/u_video.h" - #include "vl/vl_defines.h" #include "vl/vl_video_buffer.h" -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_vce.h" +#include <unistd.h> /* generate an stream handle */ unsigned si_vid_alloc_stream_handle() { - static unsigned counter = 0; - unsigned stream_handle = 0; - unsigned pid = getpid(); - int i; + static unsigned counter = 0; + unsigned stream_handle = 0; + unsigned pid = getpid(); + int i; - for (i = 0; i < 32; ++i) - stream_handle |= ((pid >> i) & 1) << (31 - i); + for (i = 0; i < 32; ++i) + stream_handle |= ((pid >> i) & 1) << (31 - i); - stream_handle ^= ++counter; - return stream_handle; + stream_handle ^= ++counter; + return stream_handle; } /* create a buffer in the winsys */ -bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, - unsigned size, unsigned usage) +bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, unsigned size, + unsigned usage) { - memset(buffer, 0, sizeof(*buffer)); - buffer->usage = usage; + memset(buffer, 0, sizeof(*buffer)); + buffer->usage = usage; - /* Hardware buffer placement restrictions require the kernel to be - * able to move buffers around individually, so request a - * non-sub-allocated buffer. - */ - buffer->res = si_resource(pipe_buffer_create(screen, PIPE_BIND_SHARED, - usage, size)); + /* Hardware buffer placement restrictions require the kernel to be + * able to move buffers around individually, so request a + * non-sub-allocated buffer. + */ + buffer->res = si_resource(pipe_buffer_create(screen, PIPE_BIND_SHARED, usage, size)); - return buffer->res != NULL; + return buffer->res != NULL; } /* destroy a buffer */ void si_vid_destroy_buffer(struct rvid_buffer *buffer) { - si_resource_reference(&buffer->res, NULL); + si_resource_reference(&buffer->res, NULL); } /* reallocate a buffer, preserving its content */ bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs, - struct rvid_buffer *new_buf, unsigned new_size) + struct rvid_buffer *new_buf, unsigned new_size) { - struct si_screen *sscreen = (struct si_screen *)screen; - struct radeon_winsys* ws = sscreen->ws; - unsigned bytes = MIN2(new_buf->res->buf->size, new_size); - struct rvid_buffer old_buf = *new_buf; - void *src = NULL, *dst = NULL; - - if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage)) - goto error; - - src = ws->buffer_map(old_buf.res->buf, cs, - PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY); - if (!src) - goto error; - - dst = ws->buffer_map(new_buf->res->buf, cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - if (!dst) - goto error; - - memcpy(dst, src, bytes); - if (new_size > bytes) { - new_size -= bytes; - dst += bytes; - memset(dst, 0, new_size); - } - ws->buffer_unmap(new_buf->res->buf); - ws->buffer_unmap(old_buf.res->buf); - si_vid_destroy_buffer(&old_buf); - return true; + struct si_screen *sscreen = (struct si_screen *)screen; + struct radeon_winsys *ws = sscreen->ws; + unsigned bytes = MIN2(new_buf->res->buf->size, new_size); + struct rvid_buffer old_buf = *new_buf; + void *src = NULL, *dst = NULL; + + if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage)) + goto error; + + src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY); + if (!src) + goto error; + + dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + if (!dst) + goto error; + + memcpy(dst, src, bytes); + if (new_size > bytes) { + new_size -= bytes; + dst += bytes; + memset(dst, 0, new_size); + } + ws->buffer_unmap(new_buf->res->buf); + ws->buffer_unmap(old_buf.res->buf); + si_vid_destroy_buffer(&old_buf); + return true; error: - if (src) - ws->buffer_unmap(old_buf.res->buf); - si_vid_destroy_buffer(new_buf); - *new_buf = old_buf; - return false; + if (src) + ws->buffer_unmap(old_buf.res->buf); + si_vid_destroy_buffer(new_buf); + *new_buf = old_buf; + return false; } /* clear the buffer with zeros */ -void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer) +void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer *buffer) { - struct si_context *sctx = (struct si_context*)context; + struct si_context *sctx = (struct si_context *)context; - si_sdma_clear_buffer(sctx, &buffer->res->b.b, 0, buffer->res->b.b.width0, 0); - context->flush(context, NULL, 0); + si_sdma_clear_buffer(sctx, &buffer->res->b.b, 0, buffer->res->b.b.width0, 0); + context->flush(context, NULL, 0); } diff --git a/src/gallium/drivers/radeon/radeon_video.h b/src/gallium/drivers/radeon/radeon_video.h index 232b7736f7b..5ef0a213462 100644 --- a/src/gallium/drivers/radeon/radeon_video.h +++ b/src/gallium/drivers/radeon/radeon_video.h @@ -31,33 +31,32 @@ #include "radeon/radeon_winsys.h" #include "vl/vl_video_buffer.h" -#define RVID_ERR(fmt, args...) \ - fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args) +#define RVID_ERR(fmt, args...) \ + fprintf(stderr, "EE %s:%d %s UVD - " fmt, __FILE__, __LINE__, __func__, ##args) #define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8)) /* video buffer representation */ -struct rvid_buffer -{ - unsigned usage; - struct si_resource *res; +struct rvid_buffer { + unsigned usage; + struct si_resource *res; }; /* generate an stream handle */ unsigned si_vid_alloc_stream_handle(void); /* create a buffer in the winsys */ -bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, - unsigned size, unsigned usage); +bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, unsigned size, + unsigned usage); /* destroy a buffer */ void si_vid_destroy_buffer(struct rvid_buffer *buffer); /* reallocate a buffer, preserving its content */ bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs, - struct rvid_buffer *new_buf, unsigned new_size); + struct rvid_buffer *new_buf, unsigned new_size); /* clear the buffer with zeros */ -void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer); +void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer *buffer); #endif // RADEON_VIDEO_H diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 15893bd0b28..d02370af671 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -30,65 +30,70 @@ /* Whether the next IB can start immediately and not wait for draws and * dispatches from the current IB to finish. */ -#define RADEON_FLUSH_START_NEXT_GFX_IB_NOW (1u << 31) +#define RADEON_FLUSH_START_NEXT_GFX_IB_NOW (1u << 31) -#define RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW \ - (PIPE_FLUSH_ASYNC | RADEON_FLUSH_START_NEXT_GFX_IB_NOW) - -#include "pipebuffer/pb_buffer.h" +#define RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW \ + (PIPE_FLUSH_ASYNC | RADEON_FLUSH_START_NEXT_GFX_IB_NOW) #include "amd/common/ac_gpu_info.h" #include "amd/common/ac_surface.h" +#include "pipebuffer/pb_buffer.h" /* Tiling flags. */ -enum radeon_bo_layout { - RADEON_LAYOUT_LINEAR = 0, - RADEON_LAYOUT_TILED, - RADEON_LAYOUT_SQUARETILED, +enum radeon_bo_layout +{ + RADEON_LAYOUT_LINEAR = 0, + RADEON_LAYOUT_TILED, + RADEON_LAYOUT_SQUARETILED, - RADEON_LAYOUT_UNKNOWN + RADEON_LAYOUT_UNKNOWN }; -enum radeon_bo_domain { /* bitfield */ - RADEON_DOMAIN_GTT = 2, - RADEON_DOMAIN_VRAM = 4, - RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT, - RADEON_DOMAIN_GDS = 8, - RADEON_DOMAIN_OA = 16, +enum radeon_bo_domain +{ /* bitfield */ + RADEON_DOMAIN_GTT = 2, + RADEON_DOMAIN_VRAM = 4, + RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT, + RADEON_DOMAIN_GDS = 8, + RADEON_DOMAIN_OA = 16, }; -enum radeon_bo_flag { /* bitfield */ - RADEON_FLAG_GTT_WC = (1 << 0), - RADEON_FLAG_NO_CPU_ACCESS = (1 << 1), - RADEON_FLAG_NO_SUBALLOC = (1 << 2), - RADEON_FLAG_SPARSE = (1 << 3), - RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4), - RADEON_FLAG_READ_ONLY = (1 << 5), - RADEON_FLAG_32BIT = (1 << 6), +enum radeon_bo_flag +{ /* bitfield */ + RADEON_FLAG_GTT_WC = (1 << 0), + RADEON_FLAG_NO_CPU_ACCESS = (1 << 1), + RADEON_FLAG_NO_SUBALLOC = (1 << 2), + RADEON_FLAG_SPARSE = (1 << 3), + RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4), + RADEON_FLAG_READ_ONLY = (1 << 5), + RADEON_FLAG_32BIT = (1 << 6), }; -enum radeon_dependency_flag { - /* Add the dependency to the parallel compute IB only. */ - RADEON_DEPENDENCY_PARALLEL_COMPUTE_ONLY = 1 << 0, +enum radeon_dependency_flag +{ + /* Add the dependency to the parallel compute IB only. */ + RADEON_DEPENDENCY_PARALLEL_COMPUTE_ONLY = 1 << 0, - /* Instead of waiting for a job to finish execution, the dependency will - * be signaled when the job starts execution. - */ - RADEON_DEPENDENCY_START_FENCE = 1 << 1, + /* Instead of waiting for a job to finish execution, the dependency will + * be signaled when the job starts execution. + */ + RADEON_DEPENDENCY_START_FENCE = 1 << 1, }; -enum radeon_bo_usage { /* bitfield */ - RADEON_USAGE_READ = 2, - RADEON_USAGE_WRITE = 4, - RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE, +enum radeon_bo_usage +{ /* bitfield */ + RADEON_USAGE_READ = 2, + RADEON_USAGE_WRITE = 4, + RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE, - /* The winsys ensures that the CS submission will be scheduled after - * previously flushed CSs referencing this BO in a conflicting way. - */ - RADEON_USAGE_SYNCHRONIZED = 8 + /* The winsys ensures that the CS submission will be scheduled after + * previously flushed CSs referencing this BO in a conflicting way. + */ + RADEON_USAGE_SYNCHRONIZED = 8 }; -enum radeon_transfer_flags { +enum radeon_transfer_flags +{ /* Indicates that the caller will unmap the buffer. * * Not unmapping buffers is an important performance optimization for @@ -99,771 +104,725 @@ enum radeon_transfer_flags { #define RADEON_SPARSE_PAGE_SIZE (64 * 1024) -enum radeon_value_id { - RADEON_REQUESTED_VRAM_MEMORY, - RADEON_REQUESTED_GTT_MEMORY, - RADEON_MAPPED_VRAM, - RADEON_MAPPED_GTT, - RADEON_BUFFER_WAIT_TIME_NS, - RADEON_NUM_MAPPED_BUFFERS, - RADEON_TIMESTAMP, - RADEON_NUM_GFX_IBS, - RADEON_NUM_SDMA_IBS, - RADEON_GFX_BO_LIST_COUNTER, /* number of BOs submitted in gfx IBs */ - RADEON_GFX_IB_SIZE_COUNTER, - RADEON_NUM_BYTES_MOVED, - RADEON_NUM_EVICTIONS, - RADEON_NUM_VRAM_CPU_PAGE_FAULTS, - RADEON_VRAM_USAGE, - RADEON_VRAM_VIS_USAGE, - RADEON_GTT_USAGE, - RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */ - RADEON_CURRENT_SCLK, - RADEON_CURRENT_MCLK, - RADEON_CS_THREAD_TIME, +enum radeon_value_id +{ + RADEON_REQUESTED_VRAM_MEMORY, + RADEON_REQUESTED_GTT_MEMORY, + RADEON_MAPPED_VRAM, + RADEON_MAPPED_GTT, + RADEON_BUFFER_WAIT_TIME_NS, + RADEON_NUM_MAPPED_BUFFERS, + RADEON_TIMESTAMP, + RADEON_NUM_GFX_IBS, + RADEON_NUM_SDMA_IBS, + RADEON_GFX_BO_LIST_COUNTER, /* number of BOs submitted in gfx IBs */ + RADEON_GFX_IB_SIZE_COUNTER, + RADEON_NUM_BYTES_MOVED, + RADEON_NUM_EVICTIONS, + RADEON_NUM_VRAM_CPU_PAGE_FAULTS, + RADEON_VRAM_USAGE, + RADEON_VRAM_VIS_USAGE, + RADEON_GTT_USAGE, + RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */ + RADEON_CURRENT_SCLK, + RADEON_CURRENT_MCLK, + RADEON_CS_THREAD_TIME, }; -enum radeon_bo_priority { - /* Each group of two has the same priority. */ - RADEON_PRIO_FENCE = 0, - RADEON_PRIO_TRACE, +enum radeon_bo_priority +{ + /* Each group of two has the same priority. */ + RADEON_PRIO_FENCE = 0, + RADEON_PRIO_TRACE, - RADEON_PRIO_SO_FILLED_SIZE = 2, - RADEON_PRIO_QUERY, + RADEON_PRIO_SO_FILLED_SIZE = 2, + RADEON_PRIO_QUERY, - RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */ - RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */ + RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */ + RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */ - RADEON_PRIO_DRAW_INDIRECT = 6, - RADEON_PRIO_INDEX_BUFFER, + RADEON_PRIO_DRAW_INDIRECT = 6, + RADEON_PRIO_INDEX_BUFFER, - RADEON_PRIO_CP_DMA = 8, - RADEON_PRIO_BORDER_COLORS, + RADEON_PRIO_CP_DMA = 8, + RADEON_PRIO_BORDER_COLORS, - RADEON_PRIO_CONST_BUFFER = 10, - RADEON_PRIO_DESCRIPTORS, + RADEON_PRIO_CONST_BUFFER = 10, + RADEON_PRIO_DESCRIPTORS, - RADEON_PRIO_SAMPLER_BUFFER = 12, - RADEON_PRIO_VERTEX_BUFFER, + RADEON_PRIO_SAMPLER_BUFFER = 12, + RADEON_PRIO_VERTEX_BUFFER, - RADEON_PRIO_SHADER_RW_BUFFER = 14, - RADEON_PRIO_COMPUTE_GLOBAL, + RADEON_PRIO_SHADER_RW_BUFFER = 14, + RADEON_PRIO_COMPUTE_GLOBAL, - RADEON_PRIO_SAMPLER_TEXTURE = 16, - RADEON_PRIO_SHADER_RW_IMAGE, + RADEON_PRIO_SAMPLER_TEXTURE = 16, + RADEON_PRIO_SHADER_RW_IMAGE, - RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18, - RADEON_PRIO_COLOR_BUFFER, + RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18, + RADEON_PRIO_COLOR_BUFFER, - RADEON_PRIO_DEPTH_BUFFER = 20, + RADEON_PRIO_DEPTH_BUFFER = 20, - RADEON_PRIO_COLOR_BUFFER_MSAA = 22, + RADEON_PRIO_COLOR_BUFFER_MSAA = 22, - RADEON_PRIO_DEPTH_BUFFER_MSAA = 24, + RADEON_PRIO_DEPTH_BUFFER_MSAA = 24, - RADEON_PRIO_SEPARATE_META = 26, - RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */ + RADEON_PRIO_SEPARATE_META = 26, + RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */ - RADEON_PRIO_SHADER_RINGS = 28, + RADEON_PRIO_SHADER_RINGS = 28, - RADEON_PRIO_SCRATCH_BUFFER = 30, - /* 31 is the maximum value */ + RADEON_PRIO_SCRATCH_BUFFER = 30, + /* 31 is the maximum value */ }; struct winsys_handle; struct radeon_winsys_ctx; struct radeon_cmdbuf_chunk { - unsigned cdw; /* Number of used dwords. */ - unsigned max_dw; /* Maximum number of dwords. */ - uint32_t *buf; /* The base pointer of the chunk. */ + unsigned cdw; /* Number of used dwords. */ + unsigned max_dw; /* Maximum number of dwords. */ + uint32_t *buf; /* The base pointer of the chunk. */ }; struct radeon_cmdbuf { - struct radeon_cmdbuf_chunk current; - struct radeon_cmdbuf_chunk *prev; - unsigned num_prev; /* Number of previous chunks. */ - unsigned max_prev; /* Space in array pointed to by prev. */ - unsigned prev_dw; /* Total number of dwords in previous chunks. */ - - /* Memory usage of the buffer list. These are always 0 for preamble IBs. */ - uint64_t used_vram; - uint64_t used_gart; - uint64_t gpu_address; + struct radeon_cmdbuf_chunk current; + struct radeon_cmdbuf_chunk *prev; + unsigned num_prev; /* Number of previous chunks. */ + unsigned max_prev; /* Space in array pointed to by prev. */ + unsigned prev_dw; /* Total number of dwords in previous chunks. */ + + /* Memory usage of the buffer list. These are always 0 for preamble IBs. */ + uint64_t used_vram; + uint64_t used_gart; + uint64_t gpu_address; }; /* Tiling info for display code, DRI sharing, and other data. */ struct radeon_bo_metadata { - /* Tiling flags describing the texture layout for display code - * and DRI sharing. - */ - union { - struct { - enum radeon_bo_layout microtile; - enum radeon_bo_layout macrotile; - unsigned pipe_config; - unsigned bankw; - unsigned bankh; - unsigned tile_split; - unsigned mtilea; - unsigned num_banks; - unsigned stride; - bool scanout; - } legacy; - - struct { - /* surface flags */ - unsigned swizzle_mode:5; - - /* DCC flags */ - /* [31:8]: max offset = 4GB - 256; 0 = DCC disabled */ - unsigned dcc_offset_256B:24; - unsigned dcc_pitch_max:14; /* (mip chain pitch - 1) for DCN */ - unsigned dcc_independent_64B:1; - - bool scanout; - } gfx9; - } u; - - /* Additional metadata associated with the buffer, in bytes. - * The maximum size is 64 * 4. This is opaque for the winsys & kernel. - * Supported by amdgpu only. - */ - uint32_t size_metadata; - uint32_t metadata[64]; + /* Tiling flags describing the texture layout for display code + * and DRI sharing. + */ + union { + struct { + enum radeon_bo_layout microtile; + enum radeon_bo_layout macrotile; + unsigned pipe_config; + unsigned bankw; + unsigned bankh; + unsigned tile_split; + unsigned mtilea; + unsigned num_banks; + unsigned stride; + bool scanout; + } legacy; + + struct { + /* surface flags */ + unsigned swizzle_mode : 5; + + /* DCC flags */ + /* [31:8]: max offset = 4GB - 256; 0 = DCC disabled */ + unsigned dcc_offset_256B : 24; + unsigned dcc_pitch_max : 14; /* (mip chain pitch - 1) for DCN */ + unsigned dcc_independent_64B : 1; + + bool scanout; + } gfx9; + } u; + + /* Additional metadata associated with the buffer, in bytes. + * The maximum size is 64 * 4. This is opaque for the winsys & kernel. + * Supported by amdgpu only. + */ + uint32_t size_metadata; + uint32_t metadata[64]; }; -enum radeon_feature_id { - RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */ - RADEON_FID_R300_CMASK_ACCESS, +enum radeon_feature_id +{ + RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */ + RADEON_FID_R300_CMASK_ACCESS, }; struct radeon_bo_list_item { - uint64_t bo_size; - uint64_t vm_address; - uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */ + uint64_t bo_size; + uint64_t vm_address; + uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */ }; struct radeon_winsys { - /** - * The screen object this winsys was created for - */ - struct pipe_screen *screen; - - /** - * Decrement the winsys reference count. - * - * \param ws The winsys this function is called for. - * \return True if the winsys and screen should be destroyed. - */ - bool (*unref)(struct radeon_winsys *ws); - - /** - * Destroy this winsys. - * - * \param ws The winsys this function is called from. - */ - void (*destroy)(struct radeon_winsys *ws); - - /** - * Query an info structure from winsys. - * - * \param ws The winsys this function is called from. - * \param info Return structure - */ - void (*query_info)(struct radeon_winsys *ws, - struct radeon_info *info); - - /** - * A hint for the winsys that it should pin its execution threads to - * a group of cores sharing a specific L3 cache if the CPU has multiple - * L3 caches. This is needed for good multithreading performance on - * AMD Zen CPUs. - */ - void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache); - - /************************************************************************** - * Buffer management. Buffer attributes are mostly fixed over its lifetime. - * - * Remember that gallium gets to choose the interface it needs, and the - * window systems must then implement that interface (rather than the - * other way around...). - *************************************************************************/ - - /** - * Create a buffer object. - * - * \param ws The winsys this function is called from. - * \param size The size to allocate. - * \param alignment An alignment of the buffer in memory. - * \param use_reusable_pool Whether the cache buffer manager should be used. - * \param domain A bitmask of the RADEON_DOMAIN_* flags. - * \return The created buffer object. - */ - struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws, - uint64_t size, - unsigned alignment, - enum radeon_bo_domain domain, - enum radeon_bo_flag flags); - - /** - * Map the entire data store of a buffer object into the client's address - * space. - * - * Callers are expected to unmap buffers again if and only if the - * RADEON_TRANSFER_TEMPORARY flag is set in \p usage. - * - * \param buf A winsys buffer object to map. - * \param cs A command stream to flush if the buffer is referenced by it. - * \param usage A bitmask of the PIPE_TRANSFER_* and RADEON_TRANSFER_* flags. - * \return The pointer at the beginning of the buffer. - */ - void *(*buffer_map)(struct pb_buffer *buf, - struct radeon_cmdbuf *cs, - enum pipe_transfer_usage usage); - - /** - * Unmap a buffer object from the client's address space. - * - * \param buf A winsys buffer object to unmap. - */ - void (*buffer_unmap)(struct pb_buffer *buf); - - /** - * Wait for the buffer and return true if the buffer is not used - * by the device. - * - * The timeout of 0 will only return the status. - * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the buffer - * is idle. - */ - bool (*buffer_wait)(struct pb_buffer *buf, uint64_t timeout, - enum radeon_bo_usage usage); - - /** - * Return buffer metadata. - * (tiling info for display code, DRI sharing, and other data) - * - * \param buf A winsys buffer object to get the flags from. - * \param md Metadata - */ - void (*buffer_get_metadata)(struct pb_buffer *buf, - struct radeon_bo_metadata *md); - - /** - * Set buffer metadata. - * (tiling info for display code, DRI sharing, and other data) - * - * \param buf A winsys buffer object to set the flags for. - * \param md Metadata - */ - void (*buffer_set_metadata)(struct pb_buffer *buf, - struct radeon_bo_metadata *md); - - /** - * Get a winsys buffer from a winsys handle. The internal structure - * of the handle is platform-specific and only a winsys should access it. - * - * \param ws The winsys this function is called from. - * \param whandle A winsys handle pointer as was received from a state - * tracker. - */ - struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws, - struct winsys_handle *whandle, - unsigned vm_alignment); - - /** - * Get a winsys buffer from a user pointer. The resulting buffer can't - * be exported. Both pointer and size must be page aligned. - * - * \param ws The winsys this function is called from. - * \param pointer User pointer to turn into a buffer object. - * \param Size Size in bytes for the new buffer. - */ - struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws, - void *pointer, uint64_t size); - - /** - * Whether the buffer was created from a user pointer. - * - * \param buf A winsys buffer object - * \return whether \p buf was created via buffer_from_ptr - */ - bool (*buffer_is_user_ptr)(struct pb_buffer *buf); - - /** Whether the buffer was suballocated. */ - bool (*buffer_is_suballocated)(struct pb_buffer *buf); - - /** - * Get a winsys handle from a winsys buffer. The internal structure - * of the handle is platform-specific and only a winsys should access it. - * - * \param ws The winsys instance for which the handle is to be valid - * \param buf A winsys buffer object to get the handle from. - * \param whandle A winsys handle pointer. - * \return true on success. - */ - bool (*buffer_get_handle)(struct radeon_winsys *ws, - struct pb_buffer *buf, - struct winsys_handle *whandle); - - /** - * Change the commitment of a (64KB-page aligned) region of the given - * sparse buffer. - * - * \warning There is no automatic synchronization with command submission. - * - * \note Only implemented by the amdgpu winsys. - * - * \return false on out of memory or other failure, true on success. - */ - bool (*buffer_commit)(struct pb_buffer *buf, - uint64_t offset, uint64_t size, - bool commit); - - /** - * Return the virtual address of a buffer. - * - * When virtual memory is not in use, this is the offset relative to the - * relocation base (non-zero for sub-allocated buffers). - * - * \param buf A winsys buffer object - * \return virtual address - */ - uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf); - - /** - * Return the offset of this buffer relative to the relocation base. - * This is only non-zero for sub-allocated buffers. - * - * This is only supported in the radeon winsys, since amdgpu uses virtual - * addresses in submissions even for the video engines. - * - * \param buf A winsys buffer object - * \return the offset for relocations - */ - unsigned (*buffer_get_reloc_offset)(struct pb_buffer *buf); - - /** - * Query the initial placement of the buffer from the kernel driver. - */ - enum radeon_bo_domain (*buffer_get_initial_domain)(struct pb_buffer *buf); - - /************************************************************************** - * Command submission. - * - * Each pipe context should create its own command stream and submit - * commands independently of other contexts. - *************************************************************************/ - - /** - * Create a command submission context. - * Various command streams can be submitted to the same context. - */ - struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws); - - /** - * Destroy a context. - */ - void (*ctx_destroy)(struct radeon_winsys_ctx *ctx); - - /** - * Query a GPU reset status. - */ - enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx); - - /** - * Create a command stream. - * - * \param ctx The submission context - * \param ring_type The ring type (GFX, DMA, UVD) - * \param flush Flush callback function associated with the command stream. - * \param user User pointer that will be passed to the flush callback. - */ - struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys_ctx *ctx, - enum ring_type ring_type, - void (*flush)(void *ctx, unsigned flags, - struct pipe_fence_handle **fence), - void *flush_ctx, - bool stop_exec_on_failure); - - /** - * Add a parallel compute IB to a gfx IB. It will share the buffer list - * and fence dependencies with the gfx IB. The gfx flush call will submit - * both IBs at the same time. - * - * The compute IB doesn't have an output fence, so the primary IB has - * to use a wait packet for synchronization. - * - * The returned IB is only a stream for writing packets to the new - * IB. Calling other winsys functions with it is not allowed, not even - * "cs_destroy". Use the gfx IB instead. - * - * \param cs Gfx IB - */ - struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs, - bool uses_gds_ordered_append); - - /** - * Destroy a command stream. - * - * \param cs A command stream to destroy. - */ - void (*cs_destroy)(struct radeon_cmdbuf *cs); - - /** - * Add a buffer. Each buffer used by a CS must be added using this function. - * - * \param cs Command stream - * \param buf Buffer - * \param usage Whether the buffer is used for read and/or write. - * \param domain Bitmask of the RADEON_DOMAIN_* flags. - * \param priority A higher number means a greater chance of being - * placed in the requested domain. 15 is the maximum. - * \return Buffer index. - */ - unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, - struct pb_buffer *buf, - enum radeon_bo_usage usage, - enum radeon_bo_domain domain, + /** + * The screen object this winsys was created for + */ + struct pipe_screen *screen; + + /** + * Decrement the winsys reference count. + * + * \param ws The winsys this function is called for. + * \return True if the winsys and screen should be destroyed. + */ + bool (*unref)(struct radeon_winsys *ws); + + /** + * Destroy this winsys. + * + * \param ws The winsys this function is called from. + */ + void (*destroy)(struct radeon_winsys *ws); + + /** + * Query an info structure from winsys. + * + * \param ws The winsys this function is called from. + * \param info Return structure + */ + void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info); + + /** + * A hint for the winsys that it should pin its execution threads to + * a group of cores sharing a specific L3 cache if the CPU has multiple + * L3 caches. This is needed for good multithreading performance on + * AMD Zen CPUs. + */ + void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache); + + /************************************************************************** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + * Remember that gallium gets to choose the interface it needs, and the + * window systems must then implement that interface (rather than the + * other way around...). + *************************************************************************/ + + /** + * Create a buffer object. + * + * \param ws The winsys this function is called from. + * \param size The size to allocate. + * \param alignment An alignment of the buffer in memory. + * \param use_reusable_pool Whether the cache buffer manager should be used. + * \param domain A bitmask of the RADEON_DOMAIN_* flags. + * \return The created buffer object. + */ + struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment, + enum radeon_bo_domain domain, enum radeon_bo_flag flags); + + /** + * Map the entire data store of a buffer object into the client's address + * space. + * + * Callers are expected to unmap buffers again if and only if the + * RADEON_TRANSFER_TEMPORARY flag is set in \p usage. + * + * \param buf A winsys buffer object to map. + * \param cs A command stream to flush if the buffer is referenced by it. + * \param usage A bitmask of the PIPE_TRANSFER_* and RADEON_TRANSFER_* flags. + * \return The pointer at the beginning of the buffer. + */ + void *(*buffer_map)(struct pb_buffer *buf, struct radeon_cmdbuf *cs, + enum pipe_transfer_usage usage); + + /** + * Unmap a buffer object from the client's address space. + * + * \param buf A winsys buffer object to unmap. + */ + void (*buffer_unmap)(struct pb_buffer *buf); + + /** + * Wait for the buffer and return true if the buffer is not used + * by the device. + * + * The timeout of 0 will only return the status. + * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the buffer + * is idle. + */ + bool (*buffer_wait)(struct pb_buffer *buf, uint64_t timeout, enum radeon_bo_usage usage); + + /** + * Return buffer metadata. + * (tiling info for display code, DRI sharing, and other data) + * + * \param buf A winsys buffer object to get the flags from. + * \param md Metadata + */ + void (*buffer_get_metadata)(struct pb_buffer *buf, struct radeon_bo_metadata *md); + + /** + * Set buffer metadata. + * (tiling info for display code, DRI sharing, and other data) + * + * \param buf A winsys buffer object to set the flags for. + * \param md Metadata + */ + void (*buffer_set_metadata)(struct pb_buffer *buf, struct radeon_bo_metadata *md); + + /** + * Get a winsys buffer from a winsys handle. The internal structure + * of the handle is platform-specific and only a winsys should access it. + * + * \param ws The winsys this function is called from. + * \param whandle A winsys handle pointer as was received from a state + * tracker. + */ + struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws, struct winsys_handle *whandle, + unsigned vm_alignment); + + /** + * Get a winsys buffer from a user pointer. The resulting buffer can't + * be exported. Both pointer and size must be page aligned. + * + * \param ws The winsys this function is called from. + * \param pointer User pointer to turn into a buffer object. + * \param Size Size in bytes for the new buffer. + */ + struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer, uint64_t size); + + /** + * Whether the buffer was created from a user pointer. + * + * \param buf A winsys buffer object + * \return whether \p buf was created via buffer_from_ptr + */ + bool (*buffer_is_user_ptr)(struct pb_buffer *buf); + + /** Whether the buffer was suballocated. */ + bool (*buffer_is_suballocated)(struct pb_buffer *buf); + + /** + * Get a winsys handle from a winsys buffer. The internal structure + * of the handle is platform-specific and only a winsys should access it. + * + * \param ws The winsys instance for which the handle is to be valid + * \param buf A winsys buffer object to get the handle from. + * \param whandle A winsys handle pointer. + * \return true on success. + */ + bool (*buffer_get_handle)(struct radeon_winsys *ws, struct pb_buffer *buf, + struct winsys_handle *whandle); + + /** + * Change the commitment of a (64KB-page aligned) region of the given + * sparse buffer. + * + * \warning There is no automatic synchronization with command submission. + * + * \note Only implemented by the amdgpu winsys. + * + * \return false on out of memory or other failure, true on success. + */ + bool (*buffer_commit)(struct pb_buffer *buf, uint64_t offset, uint64_t size, bool commit); + + /** + * Return the virtual address of a buffer. + * + * When virtual memory is not in use, this is the offset relative to the + * relocation base (non-zero for sub-allocated buffers). + * + * \param buf A winsys buffer object + * \return virtual address + */ + uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf); + + /** + * Return the offset of this buffer relative to the relocation base. + * This is only non-zero for sub-allocated buffers. + * + * This is only supported in the radeon winsys, since amdgpu uses virtual + * addresses in submissions even for the video engines. + * + * \param buf A winsys buffer object + * \return the offset for relocations + */ + unsigned (*buffer_get_reloc_offset)(struct pb_buffer *buf); + + /** + * Query the initial placement of the buffer from the kernel driver. + */ + enum radeon_bo_domain (*buffer_get_initial_domain)(struct pb_buffer *buf); + + /************************************************************************** + * Command submission. + * + * Each pipe context should create its own command stream and submit + * commands independently of other contexts. + *************************************************************************/ + + /** + * Create a command submission context. + * Various command streams can be submitted to the same context. + */ + struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws); + + /** + * Destroy a context. + */ + void (*ctx_destroy)(struct radeon_winsys_ctx *ctx); + + /** + * Query a GPU reset status. + */ + enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx); + + /** + * Create a command stream. + * + * \param ctx The submission context + * \param ring_type The ring type (GFX, DMA, UVD) + * \param flush Flush callback function associated with the command stream. + * \param user User pointer that will be passed to the flush callback. + */ + struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys_ctx *ctx, enum ring_type ring_type, + void (*flush)(void *ctx, unsigned flags, + struct pipe_fence_handle **fence), + void *flush_ctx, bool stop_exec_on_failure); + + /** + * Add a parallel compute IB to a gfx IB. It will share the buffer list + * and fence dependencies with the gfx IB. The gfx flush call will submit + * both IBs at the same time. + * + * The compute IB doesn't have an output fence, so the primary IB has + * to use a wait packet for synchronization. + * + * The returned IB is only a stream for writing packets to the new + * IB. Calling other winsys functions with it is not allowed, not even + * "cs_destroy". Use the gfx IB instead. + * + * \param cs Gfx IB + */ + struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs, + bool uses_gds_ordered_append); + + /** + * Destroy a command stream. + * + * \param cs A command stream to destroy. + */ + void (*cs_destroy)(struct radeon_cmdbuf *cs); + + /** + * Add a buffer. Each buffer used by a CS must be added using this function. + * + * \param cs Command stream + * \param buf Buffer + * \param usage Whether the buffer is used for read and/or write. + * \param domain Bitmask of the RADEON_DOMAIN_* flags. + * \param priority A higher number means a greater chance of being + * placed in the requested domain. 15 is the maximum. + * \return Buffer index. + */ + unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf, + enum radeon_bo_usage usage, enum radeon_bo_domain domain, enum radeon_bo_priority priority); - /** - * Return the index of an already-added buffer. - * - * Not supported on amdgpu. Drivers with GPUVM should not care about - * buffer indices. - * - * \param cs Command stream - * \param buf Buffer - * \return The buffer index, or -1 if the buffer has not been added. - */ - int (*cs_lookup_buffer)(struct radeon_cmdbuf *cs, - struct pb_buffer *buf); - - /** - * Return true if there is enough memory in VRAM and GTT for the buffers - * added so far. If the validation fails, all buffers which have - * been added since the last call of cs_validate will be removed and - * the CS will be flushed (provided there are still any buffers). - * - * \param cs A command stream to validate. - */ - bool (*cs_validate)(struct radeon_cmdbuf *cs); - - /** - * Check whether the given number of dwords is available in the IB. - * Optionally chain a new chunk of the IB if necessary and supported. - * - * \param cs A command stream. - * \param dw Number of CS dwords requested by the caller. - * \param force_chaining Chain the IB into a new buffer now to discard - * the CP prefetch cache (to emulate PKT3_REWIND) - * \return true if there is enough space - */ - bool (*cs_check_space)(struct radeon_cmdbuf *cs, unsigned dw, - bool force_chaining); - - /** - * Return the buffer list. - * - * This is the buffer list as passed to the kernel, i.e. it only contains - * the parent buffers of sub-allocated buffers. - * - * \param cs Command stream - * \param list Returned buffer list. Set to NULL to query the count only. - * \return The buffer count. - */ - unsigned (*cs_get_buffer_list)(struct radeon_cmdbuf *cs, - struct radeon_bo_list_item *list); - - /** - * Flush a command stream. - * - * \param cs A command stream to flush. - * \param flags, PIPE_FLUSH_* flags. - * \param fence Pointer to a fence. If non-NULL, a fence is inserted - * after the CS and is returned through this parameter. - * \return Negative POSIX error code or 0 for success. - * Asynchronous submissions never return an error. - */ - int (*cs_flush)(struct radeon_cmdbuf *cs, - unsigned flags, - struct pipe_fence_handle **fence); - - /** - * Create a fence before the CS is flushed. - * The user must flush manually to complete the initializaton of the fence. - * - * The fence must not be used for anything except \ref cs_add_fence_dependency - * before the flush. - */ - struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_cmdbuf *cs); - - /** - * Return true if a buffer is referenced by a command stream. - * - * \param cs A command stream. - * \param buf A winsys buffer. - */ - bool (*cs_is_buffer_referenced)(struct radeon_cmdbuf *cs, - struct pb_buffer *buf, - enum radeon_bo_usage usage); - - /** - * Request access to a feature for a command stream. - * - * \param cs A command stream. - * \param fid Feature ID, one of RADEON_FID_* - * \param enable Whether to enable or disable the feature. - */ - bool (*cs_request_feature)(struct radeon_cmdbuf *cs, - enum radeon_feature_id fid, - bool enable); - /** - * Make sure all asynchronous flush of the cs have completed - * - * \param cs A command stream. - */ - void (*cs_sync_flush)(struct radeon_cmdbuf *cs); - - /** - * Add a fence dependency to the CS, so that the CS will wait for - * the fence before execution. - * - * \param dependency_flags Bitmask of RADEON_DEPENDENCY_* - */ - void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs, - struct pipe_fence_handle *fence, - unsigned dependency_flags); - - /** - * Signal a syncobj when the CS finishes execution. - */ - void (*cs_add_syncobj_signal)(struct radeon_cmdbuf *cs, - struct pipe_fence_handle *fence); - - /** - * Wait for the fence and return true if the fence has been signalled. - * The timeout of 0 will only return the status. - * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence - * is signalled. - */ - bool (*fence_wait)(struct radeon_winsys *ws, - struct pipe_fence_handle *fence, - uint64_t timeout); - - /** - * Reference counting for fences. - */ - void (*fence_reference)(struct pipe_fence_handle **dst, - struct pipe_fence_handle *src); - - /** - * Create a new fence object corresponding to the given syncobj fd. - */ - struct pipe_fence_handle *(*fence_import_syncobj)(struct radeon_winsys *ws, - int fd); - - /** - * Create a new fence object corresponding to the given sync_file. - */ - struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws, - int fd); - - /** - * Return a sync_file FD corresponding to the given fence object. - */ - int (*fence_export_sync_file)(struct radeon_winsys *ws, - struct pipe_fence_handle *fence); - - /** - * Return a sync file FD that is already signalled. - */ - int (*export_signalled_sync_file)(struct radeon_winsys *ws); - - /** - * Initialize surface - * - * \param ws The winsys this function is called from. - * \param tex Input texture description - * \param flags Bitmask of RADEON_SURF_* flags - * \param bpe Bytes per pixel, it can be different for Z buffers. - * \param mode Preferred tile mode. (linear, 1D, or 2D) - * \param surf Output structure - */ - int (*surface_init)(struct radeon_winsys *ws, - const struct pipe_resource *tex, - unsigned flags, unsigned bpe, - enum radeon_surf_mode mode, - struct radeon_surf *surf); - - uint64_t (*query_value)(struct radeon_winsys *ws, - enum radeon_value_id value); - - bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, - unsigned num_registers, uint32_t *out); + /** + * Return the index of an already-added buffer. + * + * Not supported on amdgpu. Drivers with GPUVM should not care about + * buffer indices. + * + * \param cs Command stream + * \param buf Buffer + * \return The buffer index, or -1 if the buffer has not been added. + */ + int (*cs_lookup_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf); + + /** + * Return true if there is enough memory in VRAM and GTT for the buffers + * added so far. If the validation fails, all buffers which have + * been added since the last call of cs_validate will be removed and + * the CS will be flushed (provided there are still any buffers). + * + * \param cs A command stream to validate. + */ + bool (*cs_validate)(struct radeon_cmdbuf *cs); + + /** + * Check whether the given number of dwords is available in the IB. + * Optionally chain a new chunk of the IB if necessary and supported. + * + * \param cs A command stream. + * \param dw Number of CS dwords requested by the caller. + * \param force_chaining Chain the IB into a new buffer now to discard + * the CP prefetch cache (to emulate PKT3_REWIND) + * \return true if there is enough space + */ + bool (*cs_check_space)(struct radeon_cmdbuf *cs, unsigned dw, bool force_chaining); + + /** + * Return the buffer list. + * + * This is the buffer list as passed to the kernel, i.e. it only contains + * the parent buffers of sub-allocated buffers. + * + * \param cs Command stream + * \param list Returned buffer list. Set to NULL to query the count only. + * \return The buffer count. + */ + unsigned (*cs_get_buffer_list)(struct radeon_cmdbuf *cs, struct radeon_bo_list_item *list); + + /** + * Flush a command stream. + * + * \param cs A command stream to flush. + * \param flags, PIPE_FLUSH_* flags. + * \param fence Pointer to a fence. If non-NULL, a fence is inserted + * after the CS and is returned through this parameter. + * \return Negative POSIX error code or 0 for success. + * Asynchronous submissions never return an error. + */ + int (*cs_flush)(struct radeon_cmdbuf *cs, unsigned flags, struct pipe_fence_handle **fence); + + /** + * Create a fence before the CS is flushed. + * The user must flush manually to complete the initializaton of the fence. + * + * The fence must not be used for anything except \ref cs_add_fence_dependency + * before the flush. + */ + struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_cmdbuf *cs); + + /** + * Return true if a buffer is referenced by a command stream. + * + * \param cs A command stream. + * \param buf A winsys buffer. + */ + bool (*cs_is_buffer_referenced)(struct radeon_cmdbuf *cs, struct pb_buffer *buf, + enum radeon_bo_usage usage); + + /** + * Request access to a feature for a command stream. + * + * \param cs A command stream. + * \param fid Feature ID, one of RADEON_FID_* + * \param enable Whether to enable or disable the feature. + */ + bool (*cs_request_feature)(struct radeon_cmdbuf *cs, enum radeon_feature_id fid, bool enable); + /** + * Make sure all asynchronous flush of the cs have completed + * + * \param cs A command stream. + */ + void (*cs_sync_flush)(struct radeon_cmdbuf *cs); + + /** + * Add a fence dependency to the CS, so that the CS will wait for + * the fence before execution. + * + * \param dependency_flags Bitmask of RADEON_DEPENDENCY_* + */ + void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence, + unsigned dependency_flags); + + /** + * Signal a syncobj when the CS finishes execution. + */ + void (*cs_add_syncobj_signal)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence); + + /** + * Wait for the fence and return true if the fence has been signalled. + * The timeout of 0 will only return the status. + * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence + * is signalled. + */ + bool (*fence_wait)(struct radeon_winsys *ws, struct pipe_fence_handle *fence, uint64_t timeout); + + /** + * Reference counting for fences. + */ + void (*fence_reference)(struct pipe_fence_handle **dst, struct pipe_fence_handle *src); + + /** + * Create a new fence object corresponding to the given syncobj fd. + */ + struct pipe_fence_handle *(*fence_import_syncobj)(struct radeon_winsys *ws, int fd); + + /** + * Create a new fence object corresponding to the given sync_file. + */ + struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws, int fd); + + /** + * Return a sync_file FD corresponding to the given fence object. + */ + int (*fence_export_sync_file)(struct radeon_winsys *ws, struct pipe_fence_handle *fence); + + /** + * Return a sync file FD that is already signalled. + */ + int (*export_signalled_sync_file)(struct radeon_winsys *ws); + + /** + * Initialize surface + * + * \param ws The winsys this function is called from. + * \param tex Input texture description + * \param flags Bitmask of RADEON_SURF_* flags + * \param bpe Bytes per pixel, it can be different for Z buffers. + * \param mode Preferred tile mode. (linear, 1D, or 2D) + * \param surf Output structure + */ + int (*surface_init)(struct radeon_winsys *ws, const struct pipe_resource *tex, unsigned flags, + unsigned bpe, enum radeon_surf_mode mode, struct radeon_surf *surf); + + uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value); + + bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers, + uint32_t *out); }; static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw) { - return cs && (cs->prev_dw + cs->current.cdw > num_dw); + return cs && (cs->prev_dw + cs->current.cdw > num_dw); } static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value) { - cs->current.buf[cs->current.cdw++] = value; + cs->current.buf[cs->current.cdw++] = value; } -static inline void radeon_emit_array(struct radeon_cmdbuf *cs, - const uint32_t *values, unsigned count) +static inline void radeon_emit_array(struct radeon_cmdbuf *cs, const uint32_t *values, + unsigned count) { - memcpy(cs->current.buf + cs->current.cdw, values, count * 4); - cs->current.cdw += count; + memcpy(cs->current.buf + cs->current.cdw, values, count * 4); + cs->current.cdw += count; } -enum radeon_heap { - RADEON_HEAP_VRAM_NO_CPU_ACCESS, - RADEON_HEAP_VRAM_READ_ONLY, - RADEON_HEAP_VRAM_READ_ONLY_32BIT, - RADEON_HEAP_VRAM_32BIT, - RADEON_HEAP_VRAM, - RADEON_HEAP_GTT_WC, - RADEON_HEAP_GTT_WC_READ_ONLY, - RADEON_HEAP_GTT_WC_READ_ONLY_32BIT, - RADEON_HEAP_GTT_WC_32BIT, - RADEON_HEAP_GTT, - RADEON_MAX_SLAB_HEAPS, - RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS, +enum radeon_heap +{ + RADEON_HEAP_VRAM_NO_CPU_ACCESS, + RADEON_HEAP_VRAM_READ_ONLY, + RADEON_HEAP_VRAM_READ_ONLY_32BIT, + RADEON_HEAP_VRAM_32BIT, + RADEON_HEAP_VRAM, + RADEON_HEAP_GTT_WC, + RADEON_HEAP_GTT_WC_READ_ONLY, + RADEON_HEAP_GTT_WC_READ_ONLY_32BIT, + RADEON_HEAP_GTT_WC_32BIT, + RADEON_HEAP_GTT, + RADEON_MAX_SLAB_HEAPS, + RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS, }; static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap heap) { - switch (heap) { - case RADEON_HEAP_VRAM_NO_CPU_ACCESS: - case RADEON_HEAP_VRAM_READ_ONLY: - case RADEON_HEAP_VRAM_READ_ONLY_32BIT: - case RADEON_HEAP_VRAM_32BIT: - case RADEON_HEAP_VRAM: - return RADEON_DOMAIN_VRAM; - case RADEON_HEAP_GTT_WC: - case RADEON_HEAP_GTT_WC_READ_ONLY: - case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_WC_32BIT: - case RADEON_HEAP_GTT: - return RADEON_DOMAIN_GTT; - default: - assert(0); - return (enum radeon_bo_domain)0; - } + switch (heap) { + case RADEON_HEAP_VRAM_NO_CPU_ACCESS: + case RADEON_HEAP_VRAM_READ_ONLY: + case RADEON_HEAP_VRAM_READ_ONLY_32BIT: + case RADEON_HEAP_VRAM_32BIT: + case RADEON_HEAP_VRAM: + return RADEON_DOMAIN_VRAM; + case RADEON_HEAP_GTT_WC: + case RADEON_HEAP_GTT_WC_READ_ONLY: + case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: + case RADEON_HEAP_GTT_WC_32BIT: + case RADEON_HEAP_GTT: + return RADEON_DOMAIN_GTT; + default: + assert(0); + return (enum radeon_bo_domain)0; + } } static inline unsigned radeon_flags_from_heap(enum radeon_heap heap) { - unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING | - (heap != RADEON_HEAP_GTT ? RADEON_FLAG_GTT_WC : 0); - - switch (heap) { - case RADEON_HEAP_VRAM_NO_CPU_ACCESS: - return flags | - RADEON_FLAG_NO_CPU_ACCESS; - - case RADEON_HEAP_VRAM_READ_ONLY: - case RADEON_HEAP_GTT_WC_READ_ONLY: - return flags | - RADEON_FLAG_READ_ONLY; - - case RADEON_HEAP_VRAM_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: - return flags | - RADEON_FLAG_READ_ONLY | - RADEON_FLAG_32BIT; - - case RADEON_HEAP_VRAM_32BIT: - case RADEON_HEAP_GTT_WC_32BIT: - return flags | - RADEON_FLAG_32BIT; - - case RADEON_HEAP_VRAM: - case RADEON_HEAP_GTT_WC: - case RADEON_HEAP_GTT: - default: - return flags; - } + unsigned flags = + RADEON_FLAG_NO_INTERPROCESS_SHARING | (heap != RADEON_HEAP_GTT ? RADEON_FLAG_GTT_WC : 0); + + switch (heap) { + case RADEON_HEAP_VRAM_NO_CPU_ACCESS: + return flags | RADEON_FLAG_NO_CPU_ACCESS; + + case RADEON_HEAP_VRAM_READ_ONLY: + case RADEON_HEAP_GTT_WC_READ_ONLY: + return flags | RADEON_FLAG_READ_ONLY; + + case RADEON_HEAP_VRAM_READ_ONLY_32BIT: + case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: + return flags | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT; + + case RADEON_HEAP_VRAM_32BIT: + case RADEON_HEAP_GTT_WC_32BIT: + return flags | RADEON_FLAG_32BIT; + + case RADEON_HEAP_VRAM: + case RADEON_HEAP_GTT_WC: + case RADEON_HEAP_GTT: + default: + return flags; + } } /* Return the heap index for winsys allocators, or -1 on failure. */ -static inline int radeon_get_heap_index(enum radeon_bo_domain domain, - enum radeon_bo_flag flags) +static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags) { - /* VRAM implies WC (write combining) */ - assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC); - /* NO_CPU_ACCESS implies VRAM only. */ - assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == RADEON_DOMAIN_VRAM); - - /* Resources with interprocess sharing don't use any winsys allocators. */ - if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)) - return -1; - - /* Unsupported flags: NO_SUBALLOC, SPARSE. */ - if (flags & ~(RADEON_FLAG_GTT_WC | - RADEON_FLAG_NO_CPU_ACCESS | - RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY | - RADEON_FLAG_32BIT)) - return -1; - - switch (domain) { - case RADEON_DOMAIN_VRAM: - switch (flags & (RADEON_FLAG_NO_CPU_ACCESS | - RADEON_FLAG_READ_ONLY | - RADEON_FLAG_32BIT)) { - case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY: - assert(!"NO_CPU_ACCESS | READ_ONLY doesn't make sense"); - return -1; - case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT: - assert(!"NO_CPU_ACCESS with 32BIT is disallowed"); - return -1; - case RADEON_FLAG_NO_CPU_ACCESS: - return RADEON_HEAP_VRAM_NO_CPU_ACCESS; - case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - return RADEON_HEAP_VRAM_READ_ONLY_32BIT; - case RADEON_FLAG_READ_ONLY: - return RADEON_HEAP_VRAM_READ_ONLY; - case RADEON_FLAG_32BIT: - return RADEON_HEAP_VRAM_32BIT; - case 0: - return RADEON_HEAP_VRAM; - } - break; - case RADEON_DOMAIN_GTT: - switch (flags & (RADEON_FLAG_GTT_WC | - RADEON_FLAG_READ_ONLY | - RADEON_FLAG_32BIT)) { - case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - return RADEON_HEAP_GTT_WC_READ_ONLY_32BIT; - case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY: - return RADEON_HEAP_GTT_WC_READ_ONLY; - case RADEON_FLAG_GTT_WC | RADEON_FLAG_32BIT: - return RADEON_HEAP_GTT_WC_32BIT; - case RADEON_FLAG_GTT_WC: - return RADEON_HEAP_GTT_WC; - case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - case RADEON_FLAG_READ_ONLY: - assert(!"READ_ONLY without WC is disallowed"); - return -1; - case RADEON_FLAG_32BIT: - assert(!"32BIT without WC is disallowed"); - return -1; - case 0: - return RADEON_HEAP_GTT; - } - break; - default: - break; - } - return -1; + /* VRAM implies WC (write combining) */ + assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC); + /* NO_CPU_ACCESS implies VRAM only. */ + assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == RADEON_DOMAIN_VRAM); + + /* Resources with interprocess sharing don't use any winsys allocators. */ + if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)) + return -1; + + /* Unsupported flags: NO_SUBALLOC, SPARSE. */ + if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | + RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) + return -1; + + switch (domain) { + case RADEON_DOMAIN_VRAM: + switch (flags & (RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) { + case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY: + assert(!"NO_CPU_ACCESS | READ_ONLY doesn't make sense"); + return -1; + case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT: + assert(!"NO_CPU_ACCESS with 32BIT is disallowed"); + return -1; + case RADEON_FLAG_NO_CPU_ACCESS: + return RADEON_HEAP_VRAM_NO_CPU_ACCESS; + case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + return RADEON_HEAP_VRAM_READ_ONLY_32BIT; + case RADEON_FLAG_READ_ONLY: + return RADEON_HEAP_VRAM_READ_ONLY; + case RADEON_FLAG_32BIT: + return RADEON_HEAP_VRAM_32BIT; + case 0: + return RADEON_HEAP_VRAM; + } + break; + case RADEON_DOMAIN_GTT: + switch (flags & (RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) { + case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + return RADEON_HEAP_GTT_WC_READ_ONLY_32BIT; + case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY: + return RADEON_HEAP_GTT_WC_READ_ONLY; + case RADEON_FLAG_GTT_WC | RADEON_FLAG_32BIT: + return RADEON_HEAP_GTT_WC_32BIT; + case RADEON_FLAG_GTT_WC: + return RADEON_HEAP_GTT_WC; + case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + case RADEON_FLAG_READ_ONLY: + assert(!"READ_ONLY without WC is disallowed"); + return -1; + case RADEON_FLAG_32BIT: + assert(!"32BIT without WC is disallowed"); + return -1; + case 0: + return RADEON_HEAP_GTT; + } + break; + default: + break; + } + return -1; } #endif |