From 638cb29d7579ad76ea358ee30ab5e6069ee796c3 Mon Sep 17 00:00:00 2001 From: John Stebbins Date: Thu, 12 Jun 2014 15:24:26 -0700 Subject: [PATCH] pgssubdec: handle more complex PGS scenarios Add ability to handle multiple palettes and objects simultaneously. Each simultaneous object is given its own AVSubtitleRect. Note that there can be up to 64 currently valid objects, but only 2 at any one time can be "presented". --- libavcodec/pgssubdec.c | 424 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 294 insertions(+), 130 deletions(-) diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c index 4f175a3..1cafd9f 100644 --- a/libavcodec/pgssubdec.c +++ b/libavcodec/pgssubdec.c @@ -33,38 +33,104 @@ #include "libavutil/imgutils.h" #define RGBA(r,g,b,a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)) +#define MAX_EPOCH_PALETTES 8 // Max 8 allowed per PGS epoch +#define MAX_EPOCH_OBJECTS 64 // Max 64 allowed per PGS epoch +#define MAX_OBJECT_REFS 2 // Max objects per display set enum SegmentType { PALETTE_SEGMENT = 0x14, - PICTURE_SEGMENT = 0x15, + OBJECT_SEGMENT = 0x15, PRESENTATION_SEGMENT = 0x16, WINDOW_SEGMENT = 0x17, DISPLAY_SEGMENT = 0x80, }; +typedef struct PGSSubObjectRef { + int id; + int window_id; + uint8_t composition_flag; + int x; + int y; + int crop_x; + int crop_y; + int crop_w; + int crop_h; +} PGSSubObjectRef; + typedef struct PGSSubPresentation { - int x; - int y; int id_number; - int object_number; - uint8_t composition_flag; + int palette_id; + int object_count; + PGSSubObjectRef objects[MAX_OBJECT_REFS]; int64_t pts; } PGSSubPresentation; -typedef struct PGSSubPicture { +typedef struct PGSSubObject { + int id; int w; int h; uint8_t *rle; unsigned int rle_buffer_size, rle_data_len; unsigned int rle_remaining_len; -} PGSSubPicture; +} PGSSubObject; + +typedef struct PGSSubObjects { + int count; + PGSSubObject object[MAX_EPOCH_OBJECTS]; +} PGSSubObjects; + +typedef struct PGSSubPalette { + int id; + uint32_t clut[256]; +} PGSSubPalette; + +typedef struct PGSSubPalettes { + int count; + PGSSubPalette palette[MAX_EPOCH_PALETTES]; +} PGSSubPalettes; typedef struct PGSSubContext { PGSSubPresentation presentation; - uint32_t clut[256]; - PGSSubPicture picture; + PGSSubPalettes palettes; + PGSSubObjects objects; } PGSSubContext; +static void flush_cache(AVCodecContext *avctx) +{ + PGSSubContext *ctx = avctx->priv_data; + int i; + + for (i = 0; i < ctx->objects.count; i++) { + av_freep(&ctx->objects.object[i].rle); + ctx->objects.object[i].rle_buffer_size = 0; + ctx->objects.object[i].rle_remaining_len = 0; + } + ctx->objects.count = 0; + ctx->palettes.count = 0; +} + +static PGSSubObject * find_object(int id, PGSSubObjects *objects) +{ + int i; + + for (i = 0; i < objects->count; i++) { + if (objects->object[i].id == id) + return &objects->object[i]; + } + return NULL; +} + +static PGSSubPalette * find_palette(int id, PGSSubPalettes *palettes) +{ + int i; + + for (i = 0; i < palettes->count; i++) { + if (palettes->palette[i].id == id) + return &palettes->palette[i]; + } + return NULL; +} + static av_cold int init_decoder(AVCodecContext *avctx) { avctx->pix_fmt = AV_PIX_FMT_PAL8; @@ -74,10 +140,7 @@ static av_cold int init_decoder(AVCodecContext *avctx) static av_cold int close_decoder(AVCodecContext *avctx) { - PGSSubContext *ctx = avctx->priv_data; - - av_freep(&ctx->picture.rle); - ctx->picture.rle_buffer_size = 0; + flush_cache(avctx); return 0; } @@ -92,7 +155,7 @@ static av_cold int close_decoder(AVCodecContext *avctx) * @param buf pointer to the RLE data to process * @param buf_size size of the RLE data to process */ -static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, +static int decode_rle(AVCodecContext *avctx, AVSubtitleRect *rect, const uint8_t *buf, unsigned int buf_size) { const uint8_t *rle_bitmap_end; @@ -100,15 +163,15 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, rle_bitmap_end = buf + buf_size; - sub->rects[0]->pict.data[0] = av_malloc(sub->rects[0]->w * sub->rects[0]->h); + rect->pict.data[0] = av_malloc(rect->w * rect->h); - if (!sub->rects[0]->pict.data[0]) - return -1; + if (!rect->pict.data[0]) + return AVERROR(ENOMEM); pixel_count = 0; line_count = 0; - while (buf < rle_bitmap_end && line_count < sub->rects[0]->h) { + while (buf < rle_bitmap_end && line_count < rect->h) { uint8_t flags, color; int run; @@ -123,27 +186,31 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, color = flags & 0x80 ? bytestream_get_byte(&buf) : 0; } - if (run > 0 && pixel_count + run <= sub->rects[0]->w * sub->rects[0]->h) { - memset(sub->rects[0]->pict.data[0] + pixel_count, color, run); + if (run > 0 && pixel_count + run <= rect->w * rect->h) { + memset(rect->pict.data[0] + pixel_count, color, run); pixel_count += run; } else if (!run) { /* * New Line. Check if correct pixels decoded, if not display warning * and adjust bitmap pointer to correct new line position. */ - if (pixel_count % sub->rects[0]->w > 0) + if (pixel_count % rect->w > 0) { av_log(avctx, AV_LOG_ERROR, "Decoded %d pixels, when line should be %d pixels\n", - pixel_count % sub->rects[0]->w, sub->rects[0]->w); + pixel_count % rect->w, rect->w); + if (avctx->err_recognition & AV_EF_EXPLODE) { + return AVERROR_INVALIDDATA; + } + } line_count++; } } - if (pixel_count < sub->rects[0]->w * sub->rects[0]->h) { + if (pixel_count < rect->w * rect->h) { av_log(avctx, AV_LOG_ERROR, "Insufficient RLE data for subtitle\n"); - return -1; + return AVERROR_INVALIDDATA; } - av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, sub->rects[0]->w * sub->rects[0]->h); + av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, rect->w * rect->h); return 0; } @@ -157,40 +224,52 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, * @param avctx contains the current codec context * @param buf pointer to the packet to process * @param buf_size size of packet to process - * @todo TODO: Enable support for RLE data over multiple packets */ -static int parse_picture_segment(AVCodecContext *avctx, +static int parse_object_segment(AVCodecContext *avctx, const uint8_t *buf, int buf_size) { PGSSubContext *ctx = avctx->priv_data; + PGSSubObject *object; uint8_t sequence_desc; unsigned int rle_bitmap_len, width, height; + int id; if (buf_size <= 4) - return -1; + return AVERROR_INVALIDDATA; buf_size -= 4; - /* skip 3 unknown bytes: Object ID (2 bytes), Version Number */ - buf += 3; + id = bytestream_get_be16(&buf); + object = find_object(id, &ctx->objects); + if (!object) { + if (ctx->objects.count >= MAX_EPOCH_OBJECTS) { + av_log(avctx, AV_LOG_ERROR, "Too many objects in epoch\n"); + return AVERROR_INVALIDDATA; + } + object = &ctx->objects.object[ctx->objects.count++]; + object->id = id; + } + + /* skip object version number */ + buf += 1; /* Read the Sequence Description to determine if start of RLE data or appended to previous RLE */ sequence_desc = bytestream_get_byte(&buf); if (!(sequence_desc & 0x80)) { /* Additional RLE data */ - if (buf_size > ctx->picture.rle_remaining_len) - return -1; + if (buf_size > object->rle_remaining_len) + return AVERROR_INVALIDDATA; - memcpy(ctx->picture.rle + ctx->picture.rle_data_len, buf, buf_size); - ctx->picture.rle_data_len += buf_size; - ctx->picture.rle_remaining_len -= buf_size; + memcpy(object->rle + object->rle_data_len, buf, buf_size); + object->rle_data_len += buf_size; + object->rle_remaining_len -= buf_size; return 0; } if (buf_size <= 7) - return -1; + return AVERROR_INVALIDDATA; buf_size -= 7; /* Decode rle bitmap length, stored size includes width/height data */ @@ -203,20 +282,20 @@ static int parse_picture_segment(AVCodecContext *avctx, /* Make sure the bitmap is not too large */ if (avctx->width < width || avctx->height < height) { av_log(avctx, AV_LOG_ERROR, "Bitmap dimensions larger than video.\n"); - return -1; + return AVERROR_INVALIDDATA; } - ctx->picture.w = width; - ctx->picture.h = height; + object->w = width; + object->h = height; - av_fast_malloc(&ctx->picture.rle, &ctx->picture.rle_buffer_size, rle_bitmap_len); + av_fast_malloc(&object->rle, &object->rle_buffer_size, rle_bitmap_len); - if (!ctx->picture.rle) - return -1; + if (!object->rle) + return AVERROR(ENOMEM); - memcpy(ctx->picture.rle, buf, buf_size); - ctx->picture.rle_data_len = buf_size; - ctx->picture.rle_remaining_len = rle_bitmap_len - buf_size; + memcpy(object->rle, buf, buf_size); + object->rle_data_len = buf_size; + object->rle_remaining_len = rle_bitmap_len - buf_size; return 0; } @@ -231,19 +310,32 @@ static int parse_picture_segment(AVCodecContext *avctx, * @param buf pointer to the packet to process * @param buf_size size of packet to process */ -static void parse_palette_segment(AVCodecContext *avctx, +static int parse_palette_segment(AVCodecContext *avctx, const uint8_t *buf, int buf_size) { PGSSubContext *ctx = avctx->priv_data; + PGSSubPalette *palette; const uint8_t *buf_end = buf + buf_size; const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int color_id; int y, cb, cr, alpha; int r, g, b, r_add, g_add, b_add; + int id; + + id = bytestream_get_byte(&buf); + palette = find_palette(id, &ctx->palettes); + if (!palette) { + if (ctx->palettes.count >= MAX_EPOCH_PALETTES) { + av_log(avctx, AV_LOG_ERROR, "Too many palettes in epoch\n"); + return AVERROR_INVALIDDATA; + } + palette = &ctx->palettes.palette[ctx->palettes.count++]; + palette->id = id; + } - /* Skip two null bytes */ - buf += 2; + /* Skip palette version */ + buf += 1; while (buf < buf_end) { color_id = bytestream_get_byte(&buf); @@ -258,8 +350,9 @@ static void parse_palette_segment(AVCodecContext *avctx, av_dlog(avctx, "Color %d := (%d,%d,%d,%d)\n", color_id, r, g, b, alpha); /* Store color in palette */ - ctx->clut[color_id] = RGBA(r,g,b,alpha); + palette->clut[color_id] = RGBA(r,g,b,alpha); } + return 0; } /** @@ -272,7 +365,6 @@ static void parse_palette_segment(AVCodecContext *avctx, * @param buf pointer to the packet to process * @param buf_size size of packet to process * @todo TODO: Implement cropping - * @todo TODO: Implement forcing of subtitles */ static int parse_presentation_segment(AVCodecContext *avctx, const uint8_t *buf, int buf_size, @@ -280,8 +372,9 @@ static int parse_presentation_segment(AVCodecContext *avctx, { PGSSubContext *ctx = avctx->priv_data; - int x, y, ret; + int i, state, ret; + // Video descriptor int w = bytestream_get_be16(&buf); int h = bytestream_get_be16(&buf); @@ -293,49 +386,75 @@ static int parse_presentation_segment(AVCodecContext *avctx, if (ret < 0) return ret; - /* Skip 1 bytes of unknown, frame rate? */ + /* Skip 1 bytes of unknown, frame rate */ buf++; + // Composition descriptor ctx->presentation.id_number = bytestream_get_be16(&buf); - /* - * Skip 3 bytes of unknown: - * state - * palette_update_flag (0x80), - * palette_id_to_use, + * state is a 2 bit field that defines pgs epoch boundaries + * 00 - Normal, previously defined objects and palettes are still valid + * 01 - Acquisition point, previous objects and palettes can be released + * 10 - Epoch start, previous objects and palettes can be released + * 11 - Epoch continue, previous objects and palettes can be released + * + * reserved 6 bits discarded */ - buf += 3; - - ctx->presentation.object_number = bytestream_get_byte(&buf); - ctx->presentation.composition_flag = 0; - if (!ctx->presentation.object_number) - return 0; + state = bytestream_get_byte(&buf) >> 6; + if (state != 0) { + flush_cache(avctx); + } /* - * Skip 3 bytes of unknown: - * object_id_ref (2 bytes), - * window_id_ref, + * skip palette_update_flag (0x80), */ - buf += 3; - ctx->presentation.composition_flag = bytestream_get_byte(&buf); - - x = bytestream_get_be16(&buf); - y = bytestream_get_be16(&buf); - - /* TODO If cropping, cropping_x, cropping_y, cropping_width, cropping_height (all 2 bytes).*/ + buf += 1; + ctx->presentation.palette_id = bytestream_get_byte(&buf); + ctx->presentation.object_count = bytestream_get_byte(&buf); + if (ctx->presentation.object_count > MAX_OBJECT_REFS) { + av_log(avctx, AV_LOG_ERROR, + "Invalid number of presentation objects %d\n", + ctx->presentation.object_count); + ctx->presentation.object_count = 2; + if (avctx->err_recognition & AV_EF_EXPLODE) { + return AVERROR_INVALIDDATA; + } + } - av_dlog(avctx, "Subtitle Placement x=%d, y=%d\n", x, y); + for (i = 0; i < ctx->presentation.object_count; i++) + { + ctx->presentation.objects[i].id = bytestream_get_be16(&buf); + ctx->presentation.objects[i].window_id = bytestream_get_byte(&buf); + ctx->presentation.objects[i].composition_flag = bytestream_get_byte(&buf); + + ctx->presentation.objects[i].x = bytestream_get_be16(&buf); + ctx->presentation.objects[i].y = bytestream_get_be16(&buf); + + // If cropping + if (ctx->presentation.objects[i].composition_flag & 0x80) { + ctx->presentation.objects[i].crop_x = bytestream_get_be16(&buf); + ctx->presentation.objects[i].crop_y = bytestream_get_be16(&buf); + ctx->presentation.objects[i].crop_w = bytestream_get_be16(&buf); + ctx->presentation.objects[i].crop_h = bytestream_get_be16(&buf); + } - if (x > avctx->width || y > avctx->height) { - av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n", - x, y, avctx->width, avctx->height); - x = 0; y = 0; + av_dlog(avctx, "Subtitle Placement x=%d, y=%d\n", + ctx->presentation.objects[i].x, ctx->presentation.objects[i].y); + + if (ctx->presentation.objects[i].x > avctx->width || + ctx->presentation.objects[i].y > avctx->height) { + av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n", + ctx->presentation.objects[i].x, + ctx->presentation.objects[i].y, + avctx->width, avctx->height); + ctx->presentation.objects[i].x = 0; + ctx->presentation.objects[i].y = 0; + if (avctx->err_recognition & AV_EF_EXPLODE) { + return AVERROR_INVALIDDATA; + } + } } - /* Fill in dimensions */ - ctx->presentation.x = x; - ctx->presentation.y = y; - return 0; } @@ -348,64 +467,105 @@ static int parse_presentation_segment(AVCodecContext *avctx, * @param data pointer to the data pertaining the subtitle to display * @param buf pointer to the packet to process * @param buf_size size of packet to process - * @todo TODO: Fix start time, relies on correct PTS, currently too late - * - * @todo TODO: Fix end time, normally cleared by a second display - * @todo segment, which is currently ignored as it clears - * @todo the subtitle too early. */ static int display_end_segment(AVCodecContext *avctx, void *data, const uint8_t *buf, int buf_size) { AVSubtitle *sub = data; PGSSubContext *ctx = avctx->priv_data; - - /* - * The end display time is a timeout value and is only reached - * if the next subtitle is later then timeout or subtitle has - * not been cleared by a subsequent empty display command. - */ + PGSSubPalette *palette; + int i, ret; memset(sub, 0, sizeof(*sub)); sub->pts = ctx->presentation.pts; - - // Blank if last object_number was 0. - // Note that this may be wrong for more complex subtitles. - if (!ctx->presentation.object_number) - return 1; sub->start_display_time = 0; - sub->end_display_time = 20000; + // There is no explicit end time for PGS subtitles. The end time + // is defined by the start of the next sub which may contain no + // objects (i.e. clears the previous sub) + sub->end_display_time = UINT32_MAX; sub->format = 0; - sub->rects = av_mallocz(sizeof(*sub->rects)); - sub->rects[0] = av_mallocz(sizeof(*sub->rects[0])); - sub->num_rects = 1; - - if (ctx->presentation.composition_flag & 0x40) - sub->rects[0]->flags |= AV_SUBTITLE_FLAG_FORCED; + // Blank if last object_count was 0. + if (!ctx->presentation.object_count) + return 1; + sub->rects = av_mallocz(sizeof(*sub->rects) * ctx->presentation.object_count); + if (!sub->rects) { + return AVERROR(ENOMEM); + } + palette = find_palette(ctx->presentation.palette_id, &ctx->palettes); + if (!palette) { + // Missing palette. Should only happen with damaged streams. + av_log(avctx, AV_LOG_ERROR, "Invalid palette id %d\n", + ctx->presentation.palette_id); + avsubtitle_free(sub); + return AVERROR_INVALIDDATA; + } + for (i = 0; i < ctx->presentation.object_count; i++) { + PGSSubObject *object; - sub->rects[0]->x = ctx->presentation.x; - sub->rects[0]->y = ctx->presentation.y; - sub->rects[0]->w = ctx->picture.w; - sub->rects[0]->h = ctx->picture.h; - sub->rects[0]->type = SUBTITLE_BITMAP; + sub->rects[i] = av_mallocz(sizeof(*sub->rects[0])); + if (!sub->rects[i]) { + avsubtitle_free(sub); + return AVERROR(ENOMEM); + } + sub->num_rects++; + sub->rects[i]->type = SUBTITLE_BITMAP; + + /* Process bitmap */ + object = find_object(ctx->presentation.objects[i].id, &ctx->objects); + if (!object) { + // Missing object. Should only happen with damaged streams. + av_log(avctx, AV_LOG_ERROR, "Invalid object id %d\n", + ctx->presentation.objects[i].id); + if (avctx->err_recognition & AV_EF_EXPLODE) { + avsubtitle_free(sub); + return AVERROR_INVALIDDATA; + } + // Leaves rect empty with 0 width and height. + continue; + } + if (ctx->presentation.objects[i].composition_flag & 0x40) + sub->rects[i]->flags |= AV_SUBTITLE_FLAG_FORCED; + + sub->rects[i]->x = ctx->presentation.objects[i].x; + sub->rects[i]->y = ctx->presentation.objects[i].y; + sub->rects[i]->w = object->w; + sub->rects[i]->h = object->h; + + sub->rects[i]->pict.linesize[0] = object->w; + + if (object->rle) { + if (object->rle_remaining_len) { + av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes shorter than expected\n", + object->rle_data_len, object->rle_remaining_len); + if (avctx->err_recognition & AV_EF_EXPLODE) { + avsubtitle_free(sub); + return AVERROR_INVALIDDATA; + } + } + ret = decode_rle(avctx, sub->rects[i], object->rle, object->rle_data_len); + if (ret < 0) { + if ((avctx->err_recognition & AV_EF_EXPLODE) || + ret == AVERROR(ENOMEM)) { + avsubtitle_free(sub); + return ret; + } + sub->rects[i]->w = 0; + sub->rects[i]->h = 0; + continue; + } + } + /* Allocate memory for colors */ + sub->rects[i]->nb_colors = 256; + sub->rects[i]->pict.data[1] = av_mallocz(AVPALETTE_SIZE); + if (!sub->rects[i]->pict.data[1]) { + avsubtitle_free(sub); + return AVERROR(ENOMEM); + } - /* Process bitmap */ - sub->rects[0]->pict.linesize[0] = ctx->picture.w; + memcpy(sub->rects[i]->pict.data[1], palette->clut, sub->rects[i]->nb_colors * sizeof(uint32_t)); - if (ctx->picture.rle) { - if (ctx->picture.rle_remaining_len) - av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes shorter than expected\n", - ctx->picture.rle_data_len, ctx->picture.rle_remaining_len); - if(decode_rle(avctx, sub, ctx->picture.rle, ctx->picture.rle_data_len) < 0) - return 0; } - /* Allocate memory for colors */ - sub->rects[0]->nb_colors = 256; - sub->rects[0]->pict.data[1] = av_mallocz(AVPALETTE_SIZE); - - memcpy(sub->rects[0]->pict.data[1], ctx->clut, sub->rects[0]->nb_colors * sizeof(uint32_t)); - return 1; } @@ -449,17 +609,16 @@ static int decode(AVCodecContext *avctx, void *data, int *data_size, if (segment_type != DISPLAY_SEGMENT && segment_length > buf_end - buf) break; + ret = 0; switch (segment_type) { case PALETTE_SEGMENT: - parse_palette_segment(avctx, buf, segment_length); + ret = parse_palette_segment(avctx, buf, segment_length); break; - case PICTURE_SEGMENT: - parse_picture_segment(avctx, buf, segment_length); + case OBJECT_SEGMENT: + ret = parse_object_segment(avctx, buf, segment_length); break; case PRESENTATION_SEGMENT: ret = parse_presentation_segment(avctx, buf, segment_length, avpkt->pts); - if (ret < 0) - return ret; break; case WINDOW_SEGMENT: /* @@ -472,13 +631,18 @@ static int decode(AVCodecContext *avctx, void *data, int *data_size, */ break; case DISPLAY_SEGMENT: - *data_size = display_end_segment(avctx, data, buf, segment_length); + ret = display_end_segment(avctx, data, buf, segment_length); + if (ret >= 0) + *data_size = ret; break; default: av_log(avctx, AV_LOG_ERROR, "Unknown subtitle segment type 0x%x, length %d\n", segment_type, segment_length); + ret = AVERROR_INVALIDDATA; break; } + if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE)) + return ret; buf += segment_length; } -- 1.9.3