mirror of https://github.com/FFmpeg/FFmpeg.git
674 lines
24 KiB
674 lines
24 KiB
/* |
|
* NewTek SpeedHQ codec |
|
* Copyright 2017 Steinar H. Gunderson |
|
* |
|
* This file is part of FFmpeg. |
|
* |
|
* FFmpeg is free software; you can redistribute it and/or |
|
* modify it under the terms of the GNU Lesser General Public |
|
* License as published by the Free Software Foundation; either |
|
* version 2.1 of the License, or (at your option) any later version. |
|
* |
|
* FFmpeg is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
* Lesser General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU Lesser General Public |
|
* License along with FFmpeg; if not, write to the Free Software |
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
*/ |
|
|
|
/** |
|
* @file |
|
* NewTek SpeedHQ decoder. |
|
*/ |
|
|
|
#define BITSTREAM_READER_LE |
|
|
|
#include "libavutil/attributes.h" |
|
#include "libavutil/mem_internal.h" |
|
|
|
#include "avcodec.h" |
|
#include "blockdsp.h" |
|
#include "codec_internal.h" |
|
#include "decode.h" |
|
#include "get_bits.h" |
|
#include "idctdsp.h" |
|
#include "libavutil/thread.h" |
|
#include "mathops.h" |
|
#include "mpeg12data.h" |
|
#include "mpeg12vlc.h" |
|
#include "speedhq.h" |
|
#include "thread.h" |
|
|
|
#define MAX_INDEX (64 - 1) |
|
|
|
/* |
|
* 5 bits makes for very small tables, with no more than two lookups needed |
|
* for the longest (10-bit) codes. |
|
*/ |
|
#define ALPHA_VLC_BITS 5 |
|
|
|
typedef struct SHQContext { |
|
BlockDSPContext bdsp; |
|
IDCTDSPContext idsp; |
|
uint8_t permutated_intra_scantable[64]; |
|
int quant_matrix[64]; |
|
enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 } |
|
subsampling; |
|
enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type; |
|
AVPacket *avpkt; |
|
uint32_t second_field_offset; |
|
} SHQContext; |
|
|
|
/* NOTE: The first element is always 16, unscaled. */ |
|
static const uint8_t unscaled_quant_matrix[64] = { |
|
16, 16, 19, 22, 26, 27, 29, 34, |
|
16, 16, 22, 24, 27, 29, 34, 37, |
|
19, 22, 26, 27, 29, 34, 34, 38, |
|
22, 22, 26, 27, 29, 34, 37, 40, |
|
22, 26, 27, 29, 32, 35, 40, 48, |
|
26, 27, 29, 32, 35, 40, 48, 58, |
|
26, 27, 29, 34, 38, 46, 56, 69, |
|
27, 29, 35, 38, 46, 56, 69, 83 |
|
}; |
|
|
|
static VLCElem dc_lum_vlc_le[512]; |
|
static VLCElem dc_chroma_vlc_le[514]; |
|
static VLCElem dc_alpha_run_vlc_le[160]; |
|
static VLCElem dc_alpha_level_vlc_le[288]; |
|
|
|
static RL_VLC_ELEM speedhq_rl_vlc[674]; |
|
|
|
static inline int decode_dc_le(GetBitContext *gb, int component) |
|
{ |
|
int code, diff; |
|
|
|
if (component == 0 || component == 3) { |
|
code = get_vlc2(gb, dc_lum_vlc_le, DC_VLC_BITS, 2); |
|
} else { |
|
code = get_vlc2(gb, dc_chroma_vlc_le, DC_VLC_BITS, 2); |
|
} |
|
if (!code) { |
|
diff = 0; |
|
} else { |
|
diff = get_xbits_le(gb, code); |
|
} |
|
return diff; |
|
} |
|
|
|
static inline int decode_alpha_block(const SHQContext *s, GetBitContext *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize) |
|
{ |
|
uint8_t block[128]; |
|
int i = 0, x, y; |
|
|
|
memset(block, 0, sizeof(block)); |
|
|
|
{ |
|
OPEN_READER(re, gb); |
|
|
|
for ( ;; ) { |
|
int run, level; |
|
|
|
UPDATE_CACHE_LE(re, gb); |
|
GET_VLC(run, re, gb, dc_alpha_run_vlc_le, ALPHA_VLC_BITS, 2); |
|
|
|
if (run < 0) break; |
|
i += run; |
|
if (i >= 128) |
|
return AVERROR_INVALIDDATA; |
|
|
|
UPDATE_CACHE_LE(re, gb); |
|
GET_VLC(level, re, gb, dc_alpha_level_vlc_le, ALPHA_VLC_BITS, 2); |
|
block[i++] = level; |
|
} |
|
|
|
CLOSE_READER(re, gb); |
|
} |
|
|
|
for (y = 0; y < 8; y++) { |
|
for (x = 0; x < 16; x++) { |
|
last_alpha[x] -= block[y * 16 + x]; |
|
} |
|
memcpy(dest, last_alpha, 16); |
|
dest += linesize; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, int last_dc[4], int component, uint8_t *dest, int linesize) |
|
{ |
|
const int *quant_matrix = s->quant_matrix; |
|
const uint8_t *scantable = s->permutated_intra_scantable; |
|
LOCAL_ALIGNED_32(int16_t, block, [64]); |
|
int dc_offset; |
|
|
|
s->bdsp.clear_block(block); |
|
|
|
dc_offset = decode_dc_le(gb, component); |
|
last_dc[component] -= dc_offset; /* Note: Opposite of most codecs. */ |
|
block[scantable[0]] = last_dc[component]; /* quant_matrix[0] is always 16. */ |
|
|
|
/* Read AC coefficients. */ |
|
{ |
|
int i = 0; |
|
OPEN_READER(re, gb); |
|
for ( ;; ) { |
|
int level, run; |
|
UPDATE_CACHE_LE(re, gb); |
|
GET_RL_VLC(level, run, re, gb, speedhq_rl_vlc, |
|
TEX_VLC_BITS, 2, 0); |
|
if (level == 127) { |
|
break; |
|
} else if (level) { |
|
i += run; |
|
if (i > MAX_INDEX) |
|
return AVERROR_INVALIDDATA; |
|
/* If next bit is 1, level = -level */ |
|
level = (level ^ SHOW_SBITS(re, gb, 1)) - |
|
SHOW_SBITS(re, gb, 1); |
|
LAST_SKIP_BITS(re, gb, 1); |
|
} else { |
|
/* Escape. */ |
|
#if MIN_CACHE_BITS < 6 + 6 + 12 |
|
#error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE |
|
#endif |
|
run = SHOW_UBITS(re, gb, 6) + 1; |
|
SKIP_BITS(re, gb, 6); |
|
level = SHOW_UBITS(re, gb, 12) - 2048; |
|
LAST_SKIP_BITS(re, gb, 12); |
|
|
|
i += run; |
|
if (i > MAX_INDEX) |
|
return AVERROR_INVALIDDATA; |
|
} |
|
|
|
block[scantable[i]] = (level * quant_matrix[i]) >> 4; |
|
} |
|
CLOSE_READER(re, gb); |
|
} |
|
|
|
s->idsp.idct_put(dest, linesize, block); |
|
|
|
return 0; |
|
} |
|
|
|
static int decode_speedhq_border(const SHQContext *s, GetBitContext *gb, AVFrame *frame, int field_number, int line_stride) |
|
{ |
|
int linesize_y = frame->linesize[0] * line_stride; |
|
int linesize_cb = frame->linesize[1] * line_stride; |
|
int linesize_cr = frame->linesize[2] * line_stride; |
|
int linesize_a; |
|
int ret; |
|
|
|
if (s->alpha_type != SHQ_NO_ALPHA) |
|
linesize_a = frame->linesize[3] * line_stride; |
|
|
|
for (int y = 0; y < frame->height; y += 16 * line_stride) { |
|
int last_dc[4] = { 1024, 1024, 1024, 1024 }; |
|
uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a; |
|
uint8_t last_alpha[16]; |
|
int x = frame->width - 8; |
|
|
|
dest_y = frame->data[0] + frame->linesize[0] * (y + field_number) + x; |
|
if (s->subsampling == SHQ_SUBSAMPLING_420) { |
|
dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number) + x / 2; |
|
dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number) + x / 2; |
|
} else { |
|
av_assert2(s->subsampling == SHQ_SUBSAMPLING_422); |
|
dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number) + x / 2; |
|
dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number) + x / 2; |
|
} |
|
if (s->alpha_type != SHQ_NO_ALPHA) { |
|
memset(last_alpha, 255, sizeof(last_alpha)); |
|
dest_a = frame->data[3] + frame->linesize[3] * (y + field_number) + x; |
|
} |
|
|
|
if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y, linesize_y)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8, linesize_y)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb, linesize_cb)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr, linesize_cr)) < 0) |
|
return ret; |
|
|
|
if (s->subsampling != SHQ_SUBSAMPLING_420) { |
|
if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0) |
|
return ret; |
|
} |
|
|
|
if (s->alpha_type == SHQ_RLE_ALPHA) { |
|
/* Alpha coded using 16x8 RLE blocks. */ |
|
if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a, linesize_a)) < 0) |
|
return ret; |
|
if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0) |
|
return ret; |
|
} else if (s->alpha_type == SHQ_DCT_ALPHA) { |
|
/* Alpha encoded exactly like luma. */ |
|
if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a, linesize_a)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8, linesize_a)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0) |
|
return ret; |
|
} |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride, int slice_number) |
|
{ |
|
int ret, x, y, slice_offsets[5]; |
|
uint32_t slice_begin, slice_end; |
|
int linesize_y = frame->linesize[0] * line_stride; |
|
int linesize_cb = frame->linesize[1] * line_stride; |
|
int linesize_cr = frame->linesize[2] * line_stride; |
|
int linesize_a; |
|
GetBitContext gb; |
|
|
|
if (s->alpha_type != SHQ_NO_ALPHA) |
|
linesize_a = frame->linesize[3] * line_stride; |
|
|
|
if (end < start || end - start < 3 || end > buf_size) |
|
return AVERROR_INVALIDDATA; |
|
|
|
slice_offsets[0] = start; |
|
slice_offsets[4] = end; |
|
for (x = 1; x < 4; x++) { |
|
uint32_t last_offset, slice_len; |
|
|
|
last_offset = slice_offsets[x - 1]; |
|
slice_len = AV_RL24(buf + last_offset); |
|
slice_offsets[x] = last_offset + slice_len; |
|
|
|
if (slice_len < 3 || slice_offsets[x] > end - 3) |
|
return AVERROR_INVALIDDATA; |
|
} |
|
|
|
slice_begin = slice_offsets[slice_number]; |
|
slice_end = slice_offsets[slice_number + 1]; |
|
|
|
if ((ret = init_get_bits8(&gb, buf + slice_begin + 3, slice_end - slice_begin - 3)) < 0) |
|
return ret; |
|
|
|
for (y = slice_number * 16 * line_stride; y < frame->height; y += line_stride * 64) { |
|
uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a; |
|
int last_dc[4] = { 1024, 1024, 1024, 1024 }; |
|
uint8_t last_alpha[16]; |
|
|
|
memset(last_alpha, 255, sizeof(last_alpha)); |
|
|
|
dest_y = frame->data[0] + frame->linesize[0] * (y + field_number); |
|
if (s->subsampling == SHQ_SUBSAMPLING_420) { |
|
dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number); |
|
dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number); |
|
} else { |
|
dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number); |
|
dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number); |
|
} |
|
if (s->alpha_type != SHQ_NO_ALPHA) { |
|
dest_a = frame->data[3] + frame->linesize[3] * (y + field_number); |
|
} |
|
|
|
for (x = 0; x < frame->width - 8 * (s->subsampling != SHQ_SUBSAMPLING_444); x += 16) { |
|
/* Decode the four luma blocks. */ |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, linesize_y)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0) |
|
return ret; |
|
|
|
/* |
|
* Decode the first chroma block. For 4:2:0, this is the only one; |
|
* for 4:2:2, it's the top block; for 4:4:4, it's the top-left block. |
|
*/ |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, linesize_cb)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, linesize_cr)) < 0) |
|
return ret; |
|
|
|
if (s->subsampling != SHQ_SUBSAMPLING_420) { |
|
/* For 4:2:2, this is the bottom block; for 4:4:4, it's the bottom-left block. */ |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0) |
|
return ret; |
|
|
|
if (s->subsampling == SHQ_SUBSAMPLING_444) { |
|
/* Top-right and bottom-right blocks. */ |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8, linesize_cb)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8, linesize_cr)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0) |
|
return ret; |
|
|
|
dest_cb += 8; |
|
dest_cr += 8; |
|
} |
|
} |
|
dest_y += 16; |
|
dest_cb += 8; |
|
dest_cr += 8; |
|
|
|
if (s->alpha_type == SHQ_RLE_ALPHA) { |
|
/* Alpha coded using 16x8 RLE blocks. */ |
|
if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a, linesize_a)) < 0) |
|
return ret; |
|
if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0) |
|
return ret; |
|
dest_a += 16; |
|
} else if (s->alpha_type == SHQ_DCT_ALPHA) { |
|
/* Alpha encoded exactly like luma. */ |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a, linesize_a)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8, linesize_a)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0) |
|
return ret; |
|
if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0) |
|
return ret; |
|
dest_a += 16; |
|
} |
|
} |
|
} |
|
|
|
if (s->subsampling != SHQ_SUBSAMPLING_444 && (frame->width & 15) && slice_number == 3) |
|
return decode_speedhq_border(s, &gb, frame, field_number, line_stride); |
|
|
|
return 0; |
|
} |
|
|
|
static int decode_slice_progressive(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) |
|
{ |
|
SHQContext *s = avctx->priv_data; |
|
(void)threadnr; |
|
|
|
return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 0, 4, s->avpkt->size, 1, jobnr); |
|
} |
|
|
|
static int decode_slice_interlaced(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) |
|
{ |
|
SHQContext *s = avctx->priv_data; |
|
int field_number = jobnr / 4; |
|
int slice_number = jobnr % 4; |
|
(void)threadnr; |
|
|
|
if (field_number == 0) |
|
return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 0, 4, s->second_field_offset, 2, slice_number); |
|
else |
|
return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 1, s->second_field_offset, s->avpkt->size, 2, slice_number); |
|
} |
|
|
|
static void compute_quant_matrix(int *output, int qscale) |
|
{ |
|
int i; |
|
for (i = 0; i < 64; i++) output[i] = unscaled_quant_matrix[ff_zigzag_direct[i]] * qscale; |
|
} |
|
|
|
static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame, |
|
int *got_frame, AVPacket *avpkt) |
|
{ |
|
SHQContext * const s = avctx->priv_data; |
|
const uint8_t *buf = avpkt->data; |
|
int buf_size = avpkt->size; |
|
uint8_t quality; |
|
int ret; |
|
|
|
if (buf_size < 4 || avctx->width < 8 || avctx->width % 8 != 0) |
|
return AVERROR_INVALIDDATA; |
|
if (buf_size < avctx->width*avctx->height / 64 / 4) |
|
return AVERROR_INVALIDDATA; |
|
|
|
quality = buf[0]; |
|
if (quality >= 100) { |
|
return AVERROR_INVALIDDATA; |
|
} |
|
|
|
if (avctx->skip_frame >= AVDISCARD_ALL) |
|
return avpkt->size; |
|
|
|
compute_quant_matrix(s->quant_matrix, 100 - quality); |
|
|
|
s->second_field_offset = AV_RL24(buf + 1); |
|
if (s->second_field_offset >= buf_size - 3) { |
|
return AVERROR_INVALIDDATA; |
|
} |
|
|
|
avctx->coded_width = FFALIGN(avctx->width, 16); |
|
avctx->coded_height = FFALIGN(avctx->height, 16); |
|
|
|
if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0) { |
|
return ret; |
|
} |
|
|
|
s->avpkt = avpkt; |
|
|
|
if (s->second_field_offset == 4 || s->second_field_offset == (buf_size-4)) { |
|
/* |
|
* Overlapping first and second fields is used to signal |
|
* encoding only a single field. In this case, "height" |
|
* is ambiguous; it could mean either the height of the |
|
* frame as a whole, or of the field. The former would make |
|
* more sense for compatibility with legacy decoders, |
|
* but this matches the convention used in NDI, which is |
|
* the primary user of this trick. |
|
*/ |
|
if ((ret = avctx->execute2(avctx, decode_slice_progressive, frame, NULL, 4)) < 0) |
|
return ret; |
|
} else { |
|
if ((ret = avctx->execute2(avctx, decode_slice_interlaced, frame, NULL, 8)) < 0) |
|
return ret; |
|
} |
|
|
|
*got_frame = 1; |
|
return buf_size; |
|
} |
|
|
|
/* |
|
* Alpha VLC. Run and level are independently coded, and would be |
|
* outside the default limits for MAX_RUN/MAX_LEVEL, so we don't |
|
* bother with combining them into one table. |
|
*/ |
|
static av_cold void compute_alpha_vlcs(void) |
|
{ |
|
uint16_t run_code[134], level_code[266]; |
|
uint8_t run_bits[134], level_bits[266]; |
|
int16_t run_symbols[134], level_symbols[266]; |
|
int entry, i, sign; |
|
|
|
/* Initialize VLC for alpha run. */ |
|
entry = 0; |
|
|
|
/* 0 -> 0. */ |
|
run_code[entry] = 0; |
|
run_bits[entry] = 1; |
|
run_symbols[entry] = 0; |
|
++entry; |
|
|
|
/* 10xx -> xx plus 1. */ |
|
for (i = 0; i < 4; ++i) { |
|
run_code[entry] = (i << 2) | 1; |
|
run_bits[entry] = 4; |
|
run_symbols[entry] = i + 1; |
|
++entry; |
|
} |
|
|
|
/* 111xxxxxxx -> xxxxxxx. */ |
|
for (i = 0; i < 128; ++i) { |
|
run_code[entry] = (i << 3) | 7; |
|
run_bits[entry] = 10; |
|
run_symbols[entry] = i; |
|
++entry; |
|
} |
|
|
|
/* 110 -> EOB. */ |
|
run_code[entry] = 3; |
|
run_bits[entry] = 3; |
|
run_symbols[entry] = -1; |
|
++entry; |
|
|
|
av_assert0(entry == FF_ARRAY_ELEMS(run_code)); |
|
|
|
VLC_INIT_STATIC_SPARSE_TABLE(dc_alpha_run_vlc_le, ALPHA_VLC_BITS, |
|
FF_ARRAY_ELEMS(run_code), |
|
run_bits, 1, 1, |
|
run_code, 2, 2, |
|
run_symbols, 2, 2, VLC_INIT_LE); |
|
|
|
/* Initialize VLC for alpha level. */ |
|
entry = 0; |
|
|
|
for (sign = 0; sign <= 1; ++sign) { |
|
/* 1s -> -1 or +1 (depending on sign bit). */ |
|
level_code[entry] = (sign << 1) | 1; |
|
level_bits[entry] = 2; |
|
level_symbols[entry] = sign ? -1 : 1; |
|
++entry; |
|
|
|
/* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit). */ |
|
for (i = 0; i < 4; ++i) { |
|
level_code[entry] = (i << 3) | (sign << 2) | 2; |
|
level_bits[entry] = 5; |
|
level_symbols[entry] = sign ? -(i + 2) : (i + 2); |
|
++entry; |
|
} |
|
} |
|
|
|
/* |
|
* 00xxxxxxxx -> xxxxxxxx, in two's complement. There are many codes |
|
* here that would better be encoded in other ways (e.g. 0 would be |
|
* encoded by increasing run, and +/- 1 would be encoded with a |
|
* shorter code), but it doesn't hurt to allow everything. |
|
*/ |
|
for (i = 0; i < 256; ++i) { |
|
level_code[entry] = i << 2; |
|
level_bits[entry] = 10; |
|
level_symbols[entry] = i; |
|
++entry; |
|
} |
|
|
|
av_assert0(entry == FF_ARRAY_ELEMS(level_code)); |
|
|
|
VLC_INIT_STATIC_SPARSE_TABLE(dc_alpha_level_vlc_le, ALPHA_VLC_BITS, |
|
FF_ARRAY_ELEMS(level_code), |
|
level_bits, 1, 1, |
|
level_code, 2, 2, |
|
level_symbols, 2, 2, VLC_INIT_LE); |
|
} |
|
|
|
static av_cold void speedhq_static_init(void) |
|
{ |
|
/* Exactly the same as MPEG-2, except for a little-endian reader. */ |
|
VLC_INIT_STATIC_TABLE(dc_lum_vlc_le, DC_VLC_BITS, 12, |
|
ff_mpeg12_vlc_dc_lum_bits, 1, 1, |
|
ff_mpeg12_vlc_dc_lum_code, 2, 2, |
|
VLC_INIT_OUTPUT_LE); |
|
VLC_INIT_STATIC_TABLE(dc_chroma_vlc_le, DC_VLC_BITS, 12, |
|
ff_mpeg12_vlc_dc_chroma_bits, 1, 1, |
|
ff_mpeg12_vlc_dc_chroma_code, 2, 2, |
|
VLC_INIT_OUTPUT_LE); |
|
|
|
ff_init_2d_vlc_rl(ff_speedhq_vlc_table, speedhq_rl_vlc, ff_speedhq_run, |
|
ff_speedhq_level, SPEEDHQ_RL_NB_ELEMS, |
|
FF_ARRAY_ELEMS(speedhq_rl_vlc), VLC_INIT_LE); |
|
|
|
compute_alpha_vlcs(); |
|
} |
|
|
|
static av_cold int speedhq_decode_init(AVCodecContext *avctx) |
|
{ |
|
int ret; |
|
static AVOnce init_once = AV_ONCE_INIT; |
|
SHQContext * const s = avctx->priv_data; |
|
|
|
ret = ff_thread_once(&init_once, speedhq_static_init); |
|
if (ret) |
|
return AVERROR_UNKNOWN; |
|
|
|
ff_blockdsp_init(&s->bdsp); |
|
ff_idctdsp_init(&s->idsp, avctx); |
|
ff_permute_scantable(s->permutated_intra_scantable, ff_zigzag_direct, |
|
s->idsp.idct_permutation); |
|
|
|
switch (avctx->codec_tag) { |
|
case MKTAG('S', 'H', 'Q', '0'): |
|
s->subsampling = SHQ_SUBSAMPLING_420; |
|
s->alpha_type = SHQ_NO_ALPHA; |
|
avctx->pix_fmt = AV_PIX_FMT_YUV420P; |
|
break; |
|
case MKTAG('S', 'H', 'Q', '1'): |
|
s->subsampling = SHQ_SUBSAMPLING_420; |
|
s->alpha_type = SHQ_RLE_ALPHA; |
|
avctx->pix_fmt = AV_PIX_FMT_YUVA420P; |
|
break; |
|
case MKTAG('S', 'H', 'Q', '2'): |
|
s->subsampling = SHQ_SUBSAMPLING_422; |
|
s->alpha_type = SHQ_NO_ALPHA; |
|
avctx->pix_fmt = AV_PIX_FMT_YUV422P; |
|
break; |
|
case MKTAG('S', 'H', 'Q', '3'): |
|
s->subsampling = SHQ_SUBSAMPLING_422; |
|
s->alpha_type = SHQ_RLE_ALPHA; |
|
avctx->pix_fmt = AV_PIX_FMT_YUVA422P; |
|
break; |
|
case MKTAG('S', 'H', 'Q', '4'): |
|
s->subsampling = SHQ_SUBSAMPLING_444; |
|
s->alpha_type = SHQ_NO_ALPHA; |
|
avctx->pix_fmt = AV_PIX_FMT_YUV444P; |
|
break; |
|
case MKTAG('S', 'H', 'Q', '5'): |
|
s->subsampling = SHQ_SUBSAMPLING_444; |
|
s->alpha_type = SHQ_RLE_ALPHA; |
|
avctx->pix_fmt = AV_PIX_FMT_YUVA444P; |
|
break; |
|
case MKTAG('S', 'H', 'Q', '7'): |
|
s->subsampling = SHQ_SUBSAMPLING_422; |
|
s->alpha_type = SHQ_DCT_ALPHA; |
|
avctx->pix_fmt = AV_PIX_FMT_YUVA422P; |
|
break; |
|
case MKTAG('S', 'H', 'Q', '9'): |
|
s->subsampling = SHQ_SUBSAMPLING_444; |
|
s->alpha_type = SHQ_DCT_ALPHA; |
|
avctx->pix_fmt = AV_PIX_FMT_YUVA444P; |
|
break; |
|
default: |
|
av_log(avctx, AV_LOG_ERROR, "Unknown NewTek SpeedHQ FOURCC provided (%08X)\n", |
|
avctx->codec_tag); |
|
return AVERROR_INVALIDDATA; |
|
} |
|
|
|
/* This matches what NDI's RGB -> Y'CbCr 4:2:2 converter uses. */ |
|
avctx->colorspace = AVCOL_SPC_BT470BG; |
|
avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; |
|
|
|
return 0; |
|
} |
|
|
|
const FFCodec ff_speedhq_decoder = { |
|
.p.name = "speedhq", |
|
CODEC_LONG_NAME("NewTek SpeedHQ"), |
|
.p.type = AVMEDIA_TYPE_VIDEO, |
|
.p.id = AV_CODEC_ID_SPEEDHQ, |
|
.priv_data_size = sizeof(SHQContext), |
|
.init = speedhq_decode_init, |
|
FF_CODEC_DECODE_CB(speedhq_decode_frame), |
|
.p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS, |
|
};
|
|
|