From b33fa8a1cdbe92824b833eb1ee3ec360f8d8b00f Mon Sep 17 00:00:00 2001 From: Philip Langdale Date: Wed, 9 Mar 2011 22:09:00 -0800 Subject: [PATCH] CrystalHD decoder support v7 The Broadcom CrystalHD decoder chips provide hardware video decoding for a number of video formats. It does so using a memory:memory interface where a compressed bitstream is fed in and decompressed pictures are copied out. As such, it works independent of any graphics hardware in the system. Features supported in this initial version: * Support for Linux (using current drivers/library from git.wilsonet.com) * Support for 70015 hardware * Formats: MPEG2, MPEG4 Part 2, H.264, VC1 and DivX 3.11 (untested) * Progressive content * Non-H.264 Interlaced content * H.264 MBAFF content Features missing in this initial version: * Support for OSX (might work - untested) * Support for Windows * Support for 70012 hardware * H.264 PAFF content Signed-off-by: Philip Langdale Signed-off-by: Michael Niedermayer --- MAINTAINERS | 1 + configure | 10 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 6 + libavcodec/crystalhd.c | 960 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 978 insertions(+) create mode 100644 libavcodec/crystalhd.c diff --git a/MAINTAINERS b/MAINTAINERS index 884f7c32c1..42b39d877a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -231,6 +231,7 @@ Codecs: zmbv* Kostya Shishkov Hardware acceleration: + crystalhd.c Philip Langdale dxva2* Laurent Aimar vaapi* Gwenole Beauchesne vdpau* Carl Eugen Hoyos diff --git a/configure b/configure index 0f37fc23fc..ca76a2708b 100755 --- a/configure +++ b/configure @@ -878,6 +878,7 @@ CONFIG_LIST=" avformat avisynth bzlib + crystalhd dct doc dwt @@ -1244,6 +1245,7 @@ h263_vaapi_hwaccel_select="vaapi h263_decoder" h263i_decoder_select="h263_decoder" h263p_encoder_select="h263_encoder" h264_decoder_select="golomb h264dsp h264pred" +h264_crystalhd_decoder_select="crystalhd h264_mp4toannexb_bsf" h264_dxva2_hwaccel_deps="dxva2api_h" h264_dxva2_hwaccel_select="dxva2 h264_decoder" h264_vaapi_hwaccel_select="vaapi" @@ -1266,13 +1268,16 @@ mpeg4_decoder_select="h263_decoder mpeg4video_parser" mpeg4_encoder_select="h263_encoder" mpeg_vdpau_decoder_select="vdpau mpegvideo_decoder" mpeg1_vdpau_decoder_select="vdpau mpeg1video_decoder" +mpeg2_crystalhd_decoder_select="crystalhd" mpeg2_dxva2_hwaccel_deps="dxva2api_h" mpeg2_dxva2_hwaccel_select="dxva2 mpeg2video_decoder" mpeg2_vaapi_hwaccel_select="vaapi mpeg2video_decoder" +mpeg4_crystalhd_decoder_select="crystalhd" mpeg4_vaapi_hwaccel_select="vaapi mpeg4_decoder" mpeg4_vdpau_decoder_select="vdpau mpeg4_decoder" mpeg_xvmc_decoder_deps="X11_extensions_XvMClib_h" mpeg_xvmc_decoder_select="mpegvideo_decoder" +msmpeg4_crystalhd_decoder_select="crystalhd" msmpeg4v1_decoder_select="h263_decoder" msmpeg4v1_encoder_select="h263_encoder" msmpeg4v2_decoder_select="h263_decoder" @@ -1309,6 +1314,7 @@ truehd_decoder_select="mlp_decoder" tscc_decoder_select="zlib" twinvq_decoder_select="mdct lsp" vc1_decoder_select="h263_decoder" +vc1_crystalhd_decoder_select="crystalhd" vc1_dxva2_hwaccel_deps="dxva2api_h DXVA_PictureParameters_wDecodedPictureIndex" vc1_dxva2_hwaccel_select="dxva2 vc1_decoder" vc1_vaapi_hwaccel_select="vaapi vc1_decoder" @@ -1330,6 +1336,7 @@ wmv1_encoder_select="h263_encoder" wmv2_decoder_select="h263_decoder" wmv2_encoder_select="h263_encoder" wmv3_decoder_select="vc1_decoder" +wmv3_crystalhd_decoder_select="crystalhd" wmv3_dxva2_hwaccel_select="vc1_dxva2_hwaccel" wmv3_vaapi_hwaccel_select="vc1_vaapi_hwaccel" wmv3_vdpau_decoder_select="vc1_vdpau_decoder" @@ -1338,6 +1345,7 @@ zlib_encoder_select="zlib" zmbv_decoder_select="zlib" zmbv_encoder_select="zlib" +crystalhd_deps="libcrystalhd_libcrystalhd_if_h" vaapi_deps="va_va_h" vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h" @@ -2760,6 +2768,7 @@ check_func_headers windows.h VirtualAlloc check_header conio.h check_header dlfcn.h check_header dxva2api.h +check_header libcrystalhd/libcrystalhd_if.h check_header malloc.h check_header poll.h check_header sys/mman.h @@ -2808,6 +2817,7 @@ for thread in $THREADS_LIST; do done check_lib math.h sin -lm +disabled crystalhd || check_lib libcrystalhd/libcrystalhd_if.h DtsGetVersion -lcrystalhd disabled vaapi || check_lib va/va.h vaInitialize -lva check_mathfunc exp2 diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 930affbb7a..3a99fcf4c1 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -26,6 +26,7 @@ OBJS = allcodecs.o \ # parts needed for many different codecs OBJS-$(CONFIG_AANDCT) += aandcttab.o +OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o OBJS-$(CONFIG_DCT) += dct.o OBJS-$(CONFIG_DWT) += dwt.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 108a3ab7c8..2fbe39552c 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -116,6 +116,7 @@ void avcodec_register_all(void) REGISTER_DECODER (H263I, h263i); REGISTER_ENCODER (H263P, h263p); REGISTER_DECODER (H264, h264); + REGISTER_DECODER (H264_CRYSTALHD, h264_crystalhd); REGISTER_DECODER (H264_VDPAU, h264_vdpau); REGISTER_ENCDEC (HUFFYUV, huffyuv); REGISTER_DECODER (IDCIN, idcin); @@ -141,10 +142,13 @@ void avcodec_register_all(void) REGISTER_ENCDEC (MPEG1VIDEO, mpeg1video); REGISTER_ENCDEC (MPEG2VIDEO, mpeg2video); REGISTER_ENCDEC (MPEG4, mpeg4); + REGISTER_DECODER (MPEG4_CRYSTALHD, mpeg4_crystalhd); REGISTER_DECODER (MPEG4_VDPAU, mpeg4_vdpau); REGISTER_DECODER (MPEGVIDEO, mpegvideo); REGISTER_DECODER (MPEG_VDPAU, mpeg_vdpau); REGISTER_DECODER (MPEG1_VDPAU, mpeg1_vdpau); + REGISTER_DECODER (MPEG2_CRYSTALHD, mpeg2_crystalhd); + REGISTER_DECODER (MSMPEG4_CRYSTALHD, msmpeg4_crystalhd); REGISTER_ENCDEC (MSMPEG4V1, msmpeg4v1); REGISTER_ENCDEC (MSMPEG4V2, msmpeg4v2); REGISTER_ENCDEC (MSMPEG4V3, msmpeg4v3); @@ -197,6 +201,7 @@ void avcodec_register_all(void) REGISTER_DECODER (V210X, v210x); REGISTER_DECODER (VB, vb); REGISTER_DECODER (VC1, vc1); + REGISTER_DECODER (VC1_CRYSTALHD, vc1_crystalhd); REGISTER_DECODER (VC1_VDPAU, vc1_vdpau); REGISTER_DECODER (VCR1, vcr1); REGISTER_DECODER (VMDVIDEO, vmdvideo); @@ -211,6 +216,7 @@ void avcodec_register_all(void) REGISTER_ENCDEC (WMV1, wmv1); REGISTER_ENCDEC (WMV2, wmv2); REGISTER_DECODER (WMV3, wmv3); + REGISTER_DECODER (WMV3_CRYSTALHD, wmv3_crystalhd); REGISTER_DECODER (WMV3_VDPAU, wmv3_vdpau); REGISTER_DECODER (WNV1, wnv1); REGISTER_DECODER (XAN_WC3, xan_wc3); diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c new file mode 100644 index 0000000000..209a86d62b --- /dev/null +++ b/libavcodec/crystalhd.c @@ -0,0 +1,960 @@ +/* + * - CrystalHD decoder module - + * + * Copyright(C) 2010 Philip Langdale + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * - Principles of Operation - + * + * The CrystalHD decoder operates at the bitstream level - which is an even + * higher level than the decoding hardware you typically see in modern GPUs. + * This means it has a very simple interface, in principle. You feed demuxed + * packets in one end and get decoded picture (fields/frames) out the other. + * + * Of course, nothing is ever that simple. Due, at the very least, to b-frame + * dependencies in the supported formats, the hardware has a delay between + * when a packet goes in, and when a picture comes out. Furthermore, this delay + * is not just a function of time, but also one of the dependency on additional + * frames being fed into the decoder to satisfy the b-frame dependencies. + * + * As such, a pipeline will build up that is roughly equivalent to the required + * DPB for the file being played. If that was all it took, things would still + * be simple - so, of course, it isn't. + * + * The hardware has a way of indicating that a picture is ready to be copied out, + * but this is unreliable - and sometimes the attempt will still fail so, based + * on testing, the code will wait until 3 pictures are ready before starting + * to copy out - and this has the effect of extending the pipeline. + * + * Finally, while it is tempting to say that once the decoder starts outputing + * frames, the software should never fail to return a frame from a decode(), + * this is a hard assertion to make, because the stream may switch between + * differently encoded content (number of b-frames, interlacing, etc) which + * might require a longer pipeline than before. If that happened, you could + * deadlock trying to retrieve a frame that can't be decoded without feeding + * in additional packets. + * + * As such, the code will return in the event that a picture cannot be copied + * out, leading to an increase in the length of the pipeline. This in turn, + * means we have to be sensitive to the time it takes to decode a picture; + * We do not want to give up just because the hardware needed a little more + * time to prepare the picture! For this reason, there are delays included + * in the decode() path that ensure that, under normal conditions, the hardware + * will only fail to return a frame if it really needs additional packets to + * complete the decoding. + * + * Finally, to be explicit, we do not want the pipeline to grow without bound + * for two reasons: 1) The hardware can only buffer a finite number of packets, + * and 2) The client application may not be able to cope with arbitrarily long + * delays in the video path relative to the audio path. For example. MPlayer + * can only handle a 20 picture delay (although this is arbitrary, and needs + * to be extended to fully support the CrystalHD where the delay could be up + * to 32 pictures - consider PAFF H.264 content with 16 b-frames). + */ + +/***************************************************************************** + * Includes + ****************************************************************************/ + +#define _XOPEN_SOURCE 600 +#include +#include +#include +#include + +#include +#include +#include + +#include "avcodec.h" +#include "libavutil/imgutils.h" +#include "libavutil/intreadwrite.h" + +/* Timeout parameter passed to DtsProcOutput() in us */ +#define OUTPUT_PROC_TIMEOUT 50 +/* Step between fake timestamps passed to hardware in units of 100ns */ +#define TIMESTAMP_UNIT 100000 +/* Initial value in us of the wait in decode() */ +#define BASE_WAIT 10000 +/* Increment in us to adjust wait in decode() */ +#define WAIT_UNIT 1000 + + +/***************************************************************************** + * Module private data + ****************************************************************************/ + +typedef enum { + RET_ERROR = -1, + RET_OK = 0, + RET_COPY_AGAIN = 1, + RET_SKIP_NEXT_COPY = 2, +} CopyRet; + +typedef struct OpaqueList { + struct OpaqueList *next; + uint64_t fake_timestamp; + uint64_t reordered_opaque; +} OpaqueList; + +typedef struct { + AVCodecContext *avctx; + AVFrame pic; + HANDLE dev; + + uint8_t is_70012; + uint8_t *sps_pps_buf; + uint32_t sps_pps_size; + uint8_t is_nal; + uint8_t output_ready; + uint8_t need_second_field; + uint8_t skip_next_output; + uint64_t decode_wait; + + uint64_t last_picture; + + OpaqueList *head; + OpaqueList *tail; +} CHDContext; + + +/***************************************************************************** + * Helper functions + ****************************************************************************/ + +static inline BC_MEDIA_SUBTYPE id2subtype(CHDContext *priv, enum CodecID id) +{ + switch (id) { + case CODEC_ID_MPEG4: + return BC_MSUBTYPE_DIVX; + case CODEC_ID_MSMPEG4V3: + return BC_MSUBTYPE_DIVX311; + case CODEC_ID_MPEG2VIDEO: + return BC_MSUBTYPE_MPEG2VIDEO; + case CODEC_ID_VC1: + return BC_MSUBTYPE_VC1; + case CODEC_ID_WMV3: + return BC_MSUBTYPE_WMV3; + case CODEC_ID_H264: + return priv->is_nal ? BC_MSUBTYPE_AVC1 : BC_MSUBTYPE_H264; + default: + return BC_MSUBTYPE_INVALID; + } +} + +static inline void print_frame_info(CHDContext *priv, BC_DTS_PROC_OUT *output) +{ + av_log(priv->avctx, AV_LOG_VERBOSE, "\tYBuffSz: %u\n", output->YbuffSz); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tYBuffDoneSz: %u\n", + output->YBuffDoneSz); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tUVBuffDoneSz: %u\n", + output->UVBuffDoneSz); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tTimestamp: %"PRIu64"\n", + output->PicInfo.timeStamp); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tPicture Number: %u\n", + output->PicInfo.picture_number); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tWidth: %u\n", + output->PicInfo.width); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tHeight: %u\n", + output->PicInfo.height); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tChroma: 0x%03x\n", + output->PicInfo.chroma_format); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tPulldown: %u\n", + output->PicInfo.pulldown); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tFlags: 0x%08x\n", + output->PicInfo.flags); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tFrame Rate/Res: %u\n", + output->PicInfo.frame_rate); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tAspect Ratio: %u\n", + output->PicInfo.aspect_ratio); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tColor Primaries: %u\n", + output->PicInfo.colour_primaries); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tMetaData: %u\n", + output->PicInfo.picture_meta_payload); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tSession Number: %u\n", + output->PicInfo.sess_num); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tycom: %u\n", + output->PicInfo.ycom); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tCustom Aspect: %u\n", + output->PicInfo.custom_aspect_ratio_width_height); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tFrames to Drop: %u\n", + output->PicInfo.n_drop); + av_log(priv->avctx, AV_LOG_VERBOSE, "\tH264 Valid Fields: 0x%08x\n", + output->PicInfo.other.h264.valid); +} + + +/***************************************************************************** + * OpaqueList functions + ****************************************************************************/ + +static uint64_t opaque_list_push(CHDContext *priv, uint64_t reordered_opaque) +{ + OpaqueList *newNode = av_mallocz(sizeof (OpaqueList)); + if (!newNode) { + av_log(priv->avctx, AV_LOG_ERROR, + "Unable to allocate new node in OpaqueList.\n"); + return 0; + } + if (!priv->head) { + newNode->fake_timestamp = TIMESTAMP_UNIT; + priv->head = newNode; + } else { + newNode->fake_timestamp = priv->tail->fake_timestamp + TIMESTAMP_UNIT; + priv->tail->next = newNode; + } + priv->tail = newNode; + newNode->reordered_opaque = reordered_opaque; + + return newNode->fake_timestamp; +} + +/* + * The OpaqueList is built in decode order, while elements will be removed + * in presentation order. If frames are reordered, this means we must be + * able to remove elements that are not the first element. + */ +static uint64_t opaque_list_pop(CHDContext *priv, uint64_t fake_timestamp) +{ + OpaqueList *node = priv->head; + + if (!priv->head) { + av_log(priv->avctx, AV_LOG_ERROR, + "CrystalHD: Attempted to query non-existent timestamps.\n"); + return AV_NOPTS_VALUE; + } + + /* + * The first element is special-cased because we have to manipulate + * the head pointer rather than the previous element in the list. + */ + if (priv->head->fake_timestamp == fake_timestamp) { + uint64_t reordered_opaque = node->reordered_opaque; + priv->head = node->next; + av_free(node); + + if (!priv->head->next) + priv->tail = priv->head; + + return reordered_opaque; + } + + /* + * The list is processed at arm's length so that we have the + * previous element available to rewrite its next pointer. + */ + while (node->next) { + OpaqueList *next = node->next; + if (next->fake_timestamp == fake_timestamp) { + uint64_t reordered_opaque = next->reordered_opaque; + node->next = next->next; + av_free(next); + + if (!node->next) + priv->tail = node; + + return reordered_opaque; + } else { + node = next; + } + } + + av_log(priv->avctx, AV_LOG_VERBOSE, + "CrystalHD: Couldn't match fake_timestamp.\n"); + return AV_NOPTS_VALUE; +} + + +/***************************************************************************** + * Video decoder API function definitions + ****************************************************************************/ + +static void flush(AVCodecContext *avctx) +{ + CHDContext *priv = avctx->priv_data; + + avctx->has_b_frames = 0; + priv->last_picture = -1; + priv->output_ready = 0; + priv->need_second_field = 0; + priv->skip_next_output = 0; + priv->decode_wait = BASE_WAIT; + + if (priv->pic.data[0]) + avctx->release_buffer(avctx, &priv->pic); + + /* Flush mode 4 flushes all software and hardware buffers. */ + DtsFlushInput(priv->dev, 4); +} + + +static av_cold int uninit(AVCodecContext *avctx) +{ + CHDContext *priv = avctx->priv_data; + HANDLE device; + + device = priv->dev; + DtsStopDecoder(device); + DtsCloseDecoder(device); + DtsDeviceClose(device); + + av_free(priv->sps_pps_buf); + + if (priv->pic.data[0]) + avctx->release_buffer(avctx, &priv->pic); + + if (priv->head) { + OpaqueList *node = priv->head; + while (node) { + OpaqueList *next = node->next; + av_free(node); + node = next; + } + } + + return 0; +} + + +static av_cold int init(AVCodecContext *avctx) +{ + CHDContext* priv; + BC_STATUS ret; + BC_INFO_CRYSTAL version; + BC_INPUT_FORMAT format = { + .FGTEnable = FALSE, + .Progressive = TRUE, + .OptFlags = 0x80000000 | vdecFrameRate59_94 | 0x40, + .width = avctx->width, + .height = avctx->height, + }; + + BC_MEDIA_SUBTYPE subtype; + + uint32_t mode = DTS_PLAYBACK_MODE | + DTS_LOAD_FILE_PLAY_FW | + DTS_SKIP_TX_CHK_CPB | + DTS_PLAYBACK_DROP_RPT_MODE | + DTS_SINGLE_THREADED_MODE | + DTS_DFLT_RESOLUTION(vdecRESOLUTION_1080p23_976); + + av_log(avctx, AV_LOG_VERBOSE, "CrystalHD Init for %s\n", + avctx->codec->name); + + avctx->pix_fmt = PIX_FMT_YUYV422; + + /* Initialize the library */ + priv = avctx->priv_data; + priv->avctx = avctx; + priv->is_nal = avctx->extradata_size > 0 && *(avctx->extradata) == 1; + priv->last_picture = -1; + priv->decode_wait = BASE_WAIT; + + subtype = id2subtype(priv, avctx->codec->id); + switch (subtype) { + case BC_MSUBTYPE_AVC1: + { + uint8_t *dummy_p; + int dummy_int; + AVBitStreamFilterContext *bsfc; + + uint32_t orig_data_size = avctx->extradata_size; + uint8_t *orig_data = av_malloc(orig_data_size); + if (!orig_data) { + av_log(avctx, AV_LOG_ERROR, + "Failed to allocate copy of extradata\n"); + return AVERROR(ENOMEM); + } + memcpy(orig_data, avctx->extradata, orig_data_size); + + + bsfc = av_bitstream_filter_init("h264_mp4toannexb"); + if (!bsfc) { + av_log(avctx, AV_LOG_ERROR, + "Cannot open the h264_mp4toannexb BSF!\n"); + av_free(orig_data); + return AVERROR_BSF_NOT_FOUND; + } + av_bitstream_filter_filter(bsfc, avctx, NULL, &dummy_p, + &dummy_int, NULL, 0, 0); + av_bitstream_filter_close(bsfc); + + priv->sps_pps_buf = avctx->extradata; + priv->sps_pps_size = avctx->extradata_size; + avctx->extradata = orig_data; + avctx->extradata_size = orig_data_size; + + format.pMetaData = priv->sps_pps_buf; + format.metaDataSz = priv->sps_pps_size; + format.startCodeSz = (avctx->extradata[4] & 0x03) + 1; + } + break; + case BC_MSUBTYPE_H264: + format.startCodeSz = 4; + // Fall-through + case BC_MSUBTYPE_VC1: + case BC_MSUBTYPE_WVC1: + case BC_MSUBTYPE_WMV3: + case BC_MSUBTYPE_WMVA: + case BC_MSUBTYPE_MPEG2VIDEO: + case BC_MSUBTYPE_DIVX: + case BC_MSUBTYPE_DIVX311: + format.pMetaData = avctx->extradata; + format.metaDataSz = avctx->extradata_size; + break; + default: + av_log(avctx, AV_LOG_ERROR, "CrystalHD: Unknown codec name\n"); + return AVERROR(EINVAL); + } + format.mSubtype = subtype; + + /* Get a decoder instance */ + av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: starting up\n"); + // Initialize the Link and Decoder devices + ret = DtsDeviceOpen(&priv->dev, mode); + if (ret != BC_STS_SUCCESS) { + av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: DtsDeviceOpen failed\n"); + goto fail; + } + + ret = DtsCrystalHDVersion(priv->dev, &version); + if (ret != BC_STS_SUCCESS) { + av_log(avctx, AV_LOG_VERBOSE, + "CrystalHD: DtsCrystalHDVersion failed\n"); + goto fail; + } + priv->is_70012 = version.device == 0; + + if (priv->is_70012 && + (subtype == BC_MSUBTYPE_DIVX || subtype == BC_MSUBTYPE_DIVX311)) { + av_log(avctx, AV_LOG_VERBOSE, + "CrystalHD: BCM70012 doesn't support MPEG4-ASP/DivX/Xvid\n"); + goto fail; + } + + ret = DtsSetInputFormat(priv->dev, &format); + if (ret != BC_STS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "CrystalHD: SetInputFormat failed\n"); + goto fail; + } + + ret = DtsOpenDecoder(priv->dev, BC_STREAM_TYPE_ES); + if (ret != BC_STS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "CrystalHD: DtsOpenDecoder failed\n"); + goto fail; + } + + ret = DtsSetColorSpace(priv->dev, OUTPUT_MODE422_YUY2); + if (ret != BC_STS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "CrystalHD: DtsSetColorSpace failed\n"); + goto fail; + } + ret = DtsStartDecoder(priv->dev); + if (ret != BC_STS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "CrystalHD: DtsStartDecoder failed\n"); + goto fail; + } + ret = DtsStartCapture(priv->dev); + if (ret != BC_STS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "CrystalHD: DtsStartCapture failed\n"); + goto fail; + } + + av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Init complete.\n"); + + return 0; + + fail: + uninit(avctx); + return -1; +} + + +/* + * The CrystalHD doesn't report interlaced H.264 content in a way that allows + * us to distinguish between specific cases that require different handling. + * So, for now, we have to hard-code the behaviour we want. + * + * The default behaviour is to assume MBAFF with input and output fieldpairs. + * + * Define ASSUME_PAFF_OVER_MBAFF to treat input as PAFF with separate input + * and output fields. + * + * Define ASSUME_TWO_INPUTS_ONE_OUTPUT to treat input as separate fields but + * output as a single fieldpair. + * + * Define both to mess up your playback. + */ +#define ASSUME_PAFF_OVER_MBAFF 0 +#define ASSUME_TWO_INPUTS_ONE_OUTPUT 0 +static inline CopyRet copy_frame(AVCodecContext *avctx, + BC_DTS_PROC_OUT *output, + void *data, int *data_size, + uint8_t second_field) +{ + BC_STATUS ret; + BC_DTS_STATUS decoder_status; + uint8_t is_paff; + uint8_t next_frame_same; + uint8_t interlaced; + + CHDContext *priv = avctx->priv_data; + + uint8_t bottom_field = (output->PicInfo.flags & VDEC_FLAG_BOTTOMFIELD) == + VDEC_FLAG_BOTTOMFIELD; + uint8_t bottom_first = !!(output->PicInfo.flags & VDEC_FLAG_BOTTOM_FIRST); + + int width = output->PicInfo.width; + int height = output->PicInfo.height; + int bwidth; + uint8_t *src = output->Ybuff; + int sStride; + uint8_t *dst; + int dStride; + + ret = DtsGetDriverStatus(priv->dev, &decoder_status); + if (ret != BC_STS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, + "CrystalHD: GetDriverStatus failed: %u\n", ret); + return RET_ERROR; + } + + is_paff = ASSUME_PAFF_OVER_MBAFF || + !(output->PicInfo.flags & VDEC_FLAG_UNKNOWN_SRC); + next_frame_same = output->PicInfo.picture_number == + (decoder_status.picNumFlags & ~0x40000000); + interlaced = ((output->PicInfo.flags & + VDEC_FLAG_INTERLACED_SRC) && is_paff) || + next_frame_same || bottom_field || second_field; + + av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: next_frame_same: %u | %u | %u\n", + next_frame_same, output->PicInfo.picture_number, + decoder_status.picNumFlags & ~0x40000000); + + if (priv->pic.data[0] && !priv->need_second_field) + avctx->release_buffer(avctx, &priv->pic); + + priv->need_second_field = interlaced && !priv->need_second_field; + + priv->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | + FF_BUFFER_HINTS_REUSABLE; + if (!priv->pic.data[0]) { + if (avctx->get_buffer(avctx, &priv->pic) < 0) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return RET_ERROR; + } + } + + bwidth = av_image_get_linesize(avctx->pix_fmt, width, 0); + if (priv->is_70012) { + int pStride; + + if (width <= 720) + pStride = 720; + else if (width <= 1280) + pStride = 1280; + else if (width <= 1080) + pStride = 1080; + sStride = av_image_get_linesize(avctx->pix_fmt, pStride, 0); + } else { + sStride = bwidth; + } + + dStride = priv->pic.linesize[0]; + dst = priv->pic.data[0]; + + av_log(priv->avctx, AV_LOG_VERBOSE, "CrystalHD: Copying out frame\n"); + + if (interlaced) { + int dY = 0; + int sY = 0; + + height /= 2; + if (bottom_field) { + av_log(priv->avctx, AV_LOG_VERBOSE, "Interlaced: bottom field\n"); + dY = 1; + } else { + av_log(priv->avctx, AV_LOG_VERBOSE, "Interlaced: top field\n"); + dY = 0; + } + + for (sY = 0; sY < height; dY++, sY++) { + memcpy(&(dst[dY * dStride]), &(src[sY * sStride]), bwidth); + if (interlaced) + dY++; + } + } else { + av_image_copy_plane(dst, dStride, src, sStride, bwidth, height); + } + + priv->pic.interlaced_frame = interlaced; + if (interlaced) + priv->pic.top_field_first = !bottom_first; + + if (output->PicInfo.timeStamp != 0) { + priv->pic.pkt_pts = opaque_list_pop(priv, output->PicInfo.timeStamp); + av_log(avctx, AV_LOG_VERBOSE, "output \"pts\": %"PRIu64"\n", + priv->pic.pkt_pts); + } + + if (!priv->need_second_field) { + *data_size = sizeof(AVFrame); + *(AVFrame *)data = priv->pic; + } + + if (ASSUME_TWO_INPUTS_ONE_OUTPUT && + output->PicInfo.flags & VDEC_FLAG_UNKNOWN_SRC) { + av_log(priv->avctx, AV_LOG_VERBOSE, "Fieldpair from two packets.\n"); + return RET_SKIP_NEXT_COPY; + } + + return RET_OK; +} + + +static inline CopyRet receive_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t second_field) +{ + BC_STATUS ret; + BC_DTS_PROC_OUT output = { + .PicInfo.width = avctx->width, + .PicInfo.height = avctx->height, + }; + CHDContext *priv = avctx->priv_data; + HANDLE dev = priv->dev; + + *data_size = 0; + + // Request decoded data from the driver + ret = DtsProcOutputNoCopy(dev, OUTPUT_PROC_TIMEOUT, &output); + if (ret == BC_STS_FMT_CHANGE) { + av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Initial format change\n"); + avctx->width = output.PicInfo.width; + avctx->height = output.PicInfo.height; + return RET_COPY_AGAIN; + } else if (ret == BC_STS_SUCCESS) { + int copy_ret = -1; + if (output.PoutFlags & BC_POUT_FLAGS_PIB_VALID) { + if (priv->last_picture == -1) { + /* + * Init to one less, so that the incrementing code doesn't + * need to be special-cased. + */ + priv->last_picture = output.PicInfo.picture_number - 1; + } + + if (avctx->codec->id == CODEC_ID_MPEG4 && + output.PicInfo.timeStamp == 0) { + av_log(avctx, AV_LOG_VERBOSE, + "CrystalHD: Not returning packed frame twice.\n"); + priv->last_picture++; + DtsReleaseOutputBuffs(dev, NULL, FALSE); + return RET_COPY_AGAIN; + } + + print_frame_info(priv, &output); + + if (priv->last_picture + 1 < output.PicInfo.picture_number) { + av_log(avctx, AV_LOG_WARNING, + "CrystalHD: Picture Number discontinuity\n"); + /* + * Have we lost frames? If so, we need to shrink the + * pipeline length appropriately. + * + * XXX: I have no idea what the semantics of this situation + * are so I don't even know if we've lost frames or which + * ones. + * + * In any case, only warn the first time. + */ + priv->last_picture = output.PicInfo.picture_number - 1; + } + + copy_ret = copy_frame(avctx, &output, data, data_size, second_field); + if (*data_size > 0) { + avctx->has_b_frames--; + priv->last_picture++; + av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Pipeline length: %u\n", + avctx->has_b_frames); + } + } else { + /* + * An invalid frame has been consumed. + */ + av_log(avctx, AV_LOG_ERROR, "CrystalHD: ProcOutput succeeded with " + "invalid PIB\n"); + avctx->has_b_frames--; + copy_ret = RET_OK; + } + DtsReleaseOutputBuffs(dev, NULL, FALSE); + + return copy_ret; + } else if (ret == BC_STS_BUSY) { + return RET_COPY_AGAIN; + } else { + av_log(avctx, AV_LOG_ERROR, "CrystalHD: ProcOutput failed %d\n", ret); + return RET_ERROR; + } +} + + +static int decode(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt) +{ + BC_STATUS ret; + BC_DTS_STATUS decoder_status; + CopyRet rec_ret; + CHDContext *priv = avctx->priv_data; + HANDLE dev = priv->dev; + int len = avpkt->size; + + av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: decode_frame\n"); + + if (len) { + int32_t tx_free = (int32_t)DtsTxFreeSize(dev); + if (len < tx_free - 1024) { + /* + * Despite being notionally opaque, either libcrystalhd or + * the hardware itself will mangle pts values that are too + * small or too large. The docs claim it should be in units + * of 100ns. Given that we're nominally dealing with a black + * box on both sides, any transform we do has no guarantee of + * avoiding mangling so we need to build a mapping to values + * we know will not be mangled. + */ + uint64_t pts = opaque_list_push(priv, avctx->pkt->pts); + if (!pts) { + return AVERROR(ENOMEM); + } + av_log(priv->avctx, AV_LOG_VERBOSE, + "input \"pts\": %"PRIu64"\n", pts); + ret = DtsProcInput(dev, avpkt->data, len, pts, 0); + if (ret == BC_STS_BUSY) { + av_log(avctx, AV_LOG_WARNING, + "CrystalHD: ProcInput returned busy\n"); + usleep(BASE_WAIT); + return AVERROR(EBUSY); + } else if (ret != BC_STS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, + "CrystalHD: ProcInput failed: %u\n", ret); + return -1; + } + avctx->has_b_frames++; + } else { + av_log(avctx, AV_LOG_WARNING, "CrystalHD: Input buffer full\n"); + len = 0; // We didn't consume any bytes. + } + } else { + av_log(avctx, AV_LOG_INFO, "CrystalHD: No more input data\n"); + } + + if (priv->skip_next_output) { + av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Skipping next output.\n"); + priv->skip_next_output = 0; + avctx->has_b_frames--; + return len; + } + + ret = DtsGetDriverStatus(dev, &decoder_status); + if (ret != BC_STS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "CrystalHD: GetDriverStatus failed\n"); + return -1; + } + + /* + * No frames ready. Don't try to extract. + * + * Empirical testing shows that ReadyListCount can be a damn lie, + * and ProcOut still fails when count > 0. The same testing showed + * that two more iterations were needed before ProcOutput would + * succeed. + */ + if (priv->output_ready < 2) { + if (decoder_status.ReadyListCount != 0) + priv->output_ready++; + usleep(BASE_WAIT); + av_log(avctx, AV_LOG_INFO, "CrystalHD: Filling pipeline.\n"); + return len; + } else if (decoder_status.ReadyListCount == 0) { + /* + * After the pipeline is established, if we encounter a lack of frames + * that probably means we're not giving the hardware enough time to + * decode them, so start increasing the wait time at the end of a + * decode call. + */ + usleep(BASE_WAIT); + priv->decode_wait += WAIT_UNIT; + av_log(avctx, AV_LOG_INFO, "CrystalHD: No frames ready. Returning\n"); + return len; + } + + do { + rec_ret = receive_frame(avctx, data, data_size, 0); + if (rec_ret == 0 && *data_size == 0) { + if (avctx->codec->id == CODEC_ID_H264) { + /* + * This case is for when the encoded fields are stored + * separately and we get a separate avpkt for each one. To keep + * the pipeline stable, we should return nothing and wait for + * the next time round to grab the second field. + * H.264 PAFF is an example of this. + */ + av_log(avctx, AV_LOG_VERBOSE, "Returning after first field.\n"); + avctx->has_b_frames--; + } else { + /* + * This case is for when the encoded fields are stored in a + * single avpkt but the hardware returns then separately. Unless + * we grab the second field before returning, we'll slip another + * frame in the pipeline and if that happens a lot, we're sunk. + * So we have to get that second field now. + * Interlaced mpeg2 and vc1 are examples of this. + */ + av_log(avctx, AV_LOG_VERBOSE, "Trying to get second field.\n"); + while (1) { + usleep(priv->decode_wait); + ret = DtsGetDriverStatus(dev, &decoder_status); + if (ret == BC_STS_SUCCESS && + decoder_status.ReadyListCount > 0) { + rec_ret = receive_frame(avctx, data, data_size, 1); + if ((rec_ret == 0 && *data_size > 0) || + rec_ret == RET_ERROR) + break; + } + } + av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Got second field.\n"); + } + } else if (rec_ret == RET_SKIP_NEXT_COPY) { + /* + * Two input packets got turned into a field pair. Gawd. + */ + av_log(avctx, AV_LOG_VERBOSE, + "Don't output on next decode call.\n"); + priv->skip_next_output = 1; + } + /* + * If rec_ret == RET_COPY_AGAIN, that means that either we just handled + * a FMT_CHANGE event and need to go around again for the actual frame, + * we got a busy status and need to try again, or we're dealing with + * packed b-frames, where the hardware strangely returns the packed + * p-frame twice. We choose to keep the second copy as it carries the + * valid pts. + */ + } while (rec_ret == RET_COPY_AGAIN); + usleep(priv->decode_wait); + return len; +} + + +#if CONFIG_H264_CRYSTALHD_DECODER +AVCodec ff_h264_crystalhd_decoder = { + .name = "h264_crystalhd", + .type = AVMEDIA_TYPE_VIDEO, + .id = CODEC_ID_H264, + .priv_data_size = sizeof(CHDContext), + .init = init, + .close = uninit, + .decode = decode, + .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL, + .flush = flush, + .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (CrystalHD acceleration)"), + .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUYV422, PIX_FMT_NONE}, +}; +#endif + +#if CONFIG_MPEG2_CRYSTALHD_DECODER +AVCodec ff_mpeg2_crystalhd_decoder = { + .name = "mpeg2_crystalhd", + .type = AVMEDIA_TYPE_VIDEO, + .id = CODEC_ID_MPEG2VIDEO, + .priv_data_size = sizeof(CHDContext), + .init = init, + .close = uninit, + .decode = decode, + .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL, + .flush = flush, + .long_name = NULL_IF_CONFIG_SMALL("MPEG-2 Video (CrystalHD acceleration)"), + .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUYV422, PIX_FMT_NONE}, +}; +#endif + +#if CONFIG_MPEG4_CRYSTALHD_DECODER +AVCodec ff_mpeg4_crystalhd_decoder = { + .name = "mpeg4_crystalhd", + .type = AVMEDIA_TYPE_VIDEO, + .id = CODEC_ID_MPEG4, + .priv_data_size = sizeof(CHDContext), + .init = init, + .close = uninit, + .decode = decode, + .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL, + .flush = flush, + .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 Part 2 (CrystalHD acceleration)"), + .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUYV422, PIX_FMT_NONE}, +}; +#endif + +#if CONFIG_MSMPEG4_CRYSTALHD_DECODER +AVCodec ff_msmpeg4_crystalhd_decoder = { + .name = "msmpeg4_crystalhd", + .type = AVMEDIA_TYPE_VIDEO, + .id = CODEC_ID_MSMPEG4V3, + .priv_data_size = sizeof(CHDContext), + .init = init, + .close = uninit, + .decode = decode, + .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL, + .flush = flush, + .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 Part 2 Microsoft variant version 3 (CrystalHD acceleration)"), + .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUYV422, PIX_FMT_NONE}, +}; +#endif + +#if CONFIG_VC1_CRYSTALHD_DECODER +AVCodec ff_vc1_crystalhd_decoder = { + .name = "vc1_crystalhd", + .type = AVMEDIA_TYPE_VIDEO, + .id = CODEC_ID_VC1, + .priv_data_size = sizeof(CHDContext), + .init = init, + .close = uninit, + .decode = decode, + .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL, + .flush = flush, + .long_name = NULL_IF_CONFIG_SMALL("SMPTE VC-1 (CrystalHD acceleration)"), + .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUYV422, PIX_FMT_NONE}, +}; +#endif + +#if CONFIG_WMV3_CRYSTALHD_DECODER +AVCodec ff_wmv3_crystalhd_decoder = { + .name = "wmv3_crystalhd", + .type = AVMEDIA_TYPE_VIDEO, + .id = CODEC_ID_WMV3, + .priv_data_size = sizeof(CHDContext), + .init = init, + .close = uninit, + .decode = decode, + .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL, + .flush = flush, + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 (CrystalHD acceleration)"), + .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUYV422, PIX_FMT_NONE}, +}; +#endif