* qatar/master: (30 commits) AVOptions: make default_val a union, as proposed in AVOption2. arm/h264pred: add missing argument type. h264dsp_mmx: place bracket outside #if/#endif block. lavf/utils: fix ff_interleave_compare_dts corner case. fate: add 10-bit H264 tests. h264: do not print "too many references" warning for intra-only. Enable decoding of high bit depth h264. Adds 8-, 9- and 10-bit versions of some of the functions used by the h264 decoder. Add support for higher QP values in h264. Add the notion of pixel size in h264 related functions. Make the h264 loop filter bit depth aware. Template dsputil_template.c with respect to pixel size, etc. Template h264idct_template.c with respect to pixel size, etc. Preparatory patch for high bit depth h264 decoding support. Move some functions in dsputil.c into a new file dsputil_template.c. Move the functions in h264idct into a new file h264idct_template.c. Move the functions in h264pred.c into a new file h264pred_template.c. Preparatory patch for high bit depth h264 decoding support. Add pixel formats for 9- and 10-bit yuv420p. Choose h264 chroma dc dequant function dynamically. ... Conflicts: doc/APIchanges ffmpeg.c ffplay.c libavcodec/alpha/dsputil_alpha.c libavcodec/arm/dsputil_init_arm.c libavcodec/arm/dsputil_init_armv6.c libavcodec/arm/dsputil_init_neon.c libavcodec/arm/dsputil_iwmmxt.c libavcodec/arm/h264pred_init_arm.c libavcodec/bfin/dsputil_bfin.c libavcodec/dsputil.c libavcodec/h264.c libavcodec/h264.h libavcodec/h264_cabac.c libavcodec/h264_cavlc.c libavcodec/h264_loopfilter.c libavcodec/h264_ps.c libavcodec/h264_refs.c libavcodec/h264dsp.c libavcodec/h264idct.c libavcodec/h264pred.c libavcodec/mlib/dsputil_mlib.c libavcodec/options.c libavcodec/ppc/dsputil_altivec.c libavcodec/ppc/dsputil_ppc.c libavcodec/ppc/h264_altivec.c libavcodec/ps2/dsputil_mmi.c libavcodec/sh4/dsputil_align.c libavcodec/sh4/dsputil_sh4.c libavcodec/sparc/dsputil_vis.c libavcodec/utils.c libavcodec/version.h libavcodec/x86/dsputil_mmx.c libavformat/options.c libavformat/utils.c libavutil/pixfmt.h libswscale/swscale.c libswscale/swscale_internal.h libswscale/swscale_template.c tests/ref/seek/lavf_avi Merged-by: Michael Niedermayer <michaelni@gmx.at>pull/2/head
commit
59eb12faff
56 changed files with 3934 additions and 459 deletions
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,313 @@ |
||||
/*
|
||||
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder |
||||
* Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at> |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
/**
|
||||
* @file |
||||
* H.264 / AVC / MPEG4 part10 DSP functions. |
||||
* @author Michael Niedermayer <michaelni@gmx.at> |
||||
*/ |
||||
|
||||
#include "high_bit_depth.h" |
||||
|
||||
#define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom ) |
||||
#define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) |
||||
#define H264_WEIGHT(W,H) \ |
||||
static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *_block, int stride, int log2_denom, int weight, int offset){ \
|
||||
int y; \
|
||||
pixel *block = (pixel*)_block; \
|
||||
stride /= sizeof(pixel); \
|
||||
offset <<= (log2_denom + (BIT_DEPTH-8)); \
|
||||
if(log2_denom) offset += 1<<(log2_denom-1); \
|
||||
for(y=0; y<H; y++, block += stride){ \
|
||||
op_scale1(0); \
|
||||
op_scale1(1); \
|
||||
if(W==2) continue; \
|
||||
op_scale1(2); \
|
||||
op_scale1(3); \
|
||||
if(W==4) continue; \
|
||||
op_scale1(4); \
|
||||
op_scale1(5); \
|
||||
op_scale1(6); \
|
||||
op_scale1(7); \
|
||||
if(W==8) continue; \
|
||||
op_scale1(8); \
|
||||
op_scale1(9); \
|
||||
op_scale1(10); \
|
||||
op_scale1(11); \
|
||||
op_scale1(12); \
|
||||
op_scale1(13); \
|
||||
op_scale1(14); \
|
||||
op_scale1(15); \
|
||||
} \
|
||||
} \
|
||||
static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_src, int stride, int log2_denom, int weightd, int weights, int offset){ \
|
||||
int y; \
|
||||
pixel *dst = (pixel*)_dst; \
|
||||
pixel *src = (pixel*)_src; \
|
||||
stride /= sizeof(pixel); \
|
||||
offset = ((offset + 1) | 1) << log2_denom; \
|
||||
for(y=0; y<H; y++, dst += stride, src += stride){ \
|
||||
op_scale2(0); \
|
||||
op_scale2(1); \
|
||||
if(W==2) continue; \
|
||||
op_scale2(2); \
|
||||
op_scale2(3); \
|
||||
if(W==4) continue; \
|
||||
op_scale2(4); \
|
||||
op_scale2(5); \
|
||||
op_scale2(6); \
|
||||
op_scale2(7); \
|
||||
if(W==8) continue; \
|
||||
op_scale2(8); \
|
||||
op_scale2(9); \
|
||||
op_scale2(10); \
|
||||
op_scale2(11); \
|
||||
op_scale2(12); \
|
||||
op_scale2(13); \
|
||||
op_scale2(14); \
|
||||
op_scale2(15); \
|
||||
} \
|
||||
} |
||||
|
||||
H264_WEIGHT(16,16) |
||||
H264_WEIGHT(16,8) |
||||
H264_WEIGHT(8,16) |
||||
H264_WEIGHT(8,8) |
||||
H264_WEIGHT(8,4) |
||||
H264_WEIGHT(4,8) |
||||
H264_WEIGHT(4,4) |
||||
H264_WEIGHT(4,2) |
||||
H264_WEIGHT(2,4) |
||||
H264_WEIGHT(2,2) |
||||
|
||||
#undef op_scale1 |
||||
#undef op_scale2 |
||||
#undef H264_WEIGHT |
||||
|
||||
static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0) |
||||
{ |
||||
pixel *pix = (pixel*)_pix; |
||||
int i, d; |
||||
xstride /= sizeof(pixel); |
||||
ystride /= sizeof(pixel); |
||||
alpha <<= BIT_DEPTH - 8; |
||||
beta <<= BIT_DEPTH - 8; |
||||
for( i = 0; i < 4; i++ ) { |
||||
const int tc_orig = tc0[i] << (BIT_DEPTH - 8); |
||||
if( tc_orig < 0 ) { |
||||
pix += inner_iters*ystride; |
||||
continue; |
||||
} |
||||
for( d = 0; d < inner_iters; d++ ) { |
||||
const int p0 = pix[-1*xstride]; |
||||
const int p1 = pix[-2*xstride]; |
||||
const int p2 = pix[-3*xstride]; |
||||
const int q0 = pix[0]; |
||||
const int q1 = pix[1*xstride]; |
||||
const int q2 = pix[2*xstride]; |
||||
|
||||
if( FFABS( p0 - q0 ) < alpha && |
||||
FFABS( p1 - p0 ) < beta && |
||||
FFABS( q1 - q0 ) < beta ) { |
||||
|
||||
int tc = tc_orig; |
||||
int i_delta; |
||||
|
||||
if( FFABS( p2 - p0 ) < beta ) { |
||||
if(tc_orig) |
||||
pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc_orig, tc_orig ); |
||||
tc++; |
||||
} |
||||
if( FFABS( q2 - q0 ) < beta ) { |
||||
if(tc_orig) |
||||
pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig ); |
||||
tc++; |
||||
} |
||||
|
||||
i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); |
||||
pix[-xstride] = av_clip_pixel( p0 + i_delta ); /* p0' */ |
||||
pix[0] = av_clip_pixel( q0 - i_delta ); /* q0' */ |
||||
} |
||||
pix += ystride; |
||||
} |
||||
} |
||||
} |
||||
static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
||||
{ |
||||
FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0); |
||||
} |
||||
static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
||||
{ |
||||
FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); |
||||
} |
||||
static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
||||
{ |
||||
FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); |
||||
} |
||||
|
||||
static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta) |
||||
{ |
||||
pixel *pix = (pixel*)_pix; |
||||
int d; |
||||
xstride /= sizeof(pixel); |
||||
ystride /= sizeof(pixel); |
||||
alpha <<= BIT_DEPTH - 8; |
||||
beta <<= BIT_DEPTH - 8; |
||||
for( d = 0; d < 4 * inner_iters; d++ ) { |
||||
const int p2 = pix[-3*xstride]; |
||||
const int p1 = pix[-2*xstride]; |
||||
const int p0 = pix[-1*xstride]; |
||||
|
||||
const int q0 = pix[ 0*xstride]; |
||||
const int q1 = pix[ 1*xstride]; |
||||
const int q2 = pix[ 2*xstride]; |
||||
|
||||
if( FFABS( p0 - q0 ) < alpha && |
||||
FFABS( p1 - p0 ) < beta && |
||||
FFABS( q1 - q0 ) < beta ) { |
||||
|
||||
if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ |
||||
if( FFABS( p2 - p0 ) < beta) |
||||
{ |
||||
const int p3 = pix[-4*xstride]; |
||||
/* p0', p1', p2' */ |
||||
pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; |
||||
pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; |
||||
pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; |
||||
} else { |
||||
/* p0' */ |
||||
pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; |
||||
} |
||||
if( FFABS( q2 - q0 ) < beta) |
||||
{ |
||||
const int q3 = pix[3*xstride]; |
||||
/* q0', q1', q2' */ |
||||
pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; |
||||
pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; |
||||
pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; |
||||
} else { |
||||
/* q0' */ |
||||
pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; |
||||
} |
||||
}else{ |
||||
/* p0', q0' */ |
||||
pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; |
||||
pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; |
||||
} |
||||
} |
||||
pix += ystride; |
||||
} |
||||
} |
||||
static void FUNCC(h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta) |
||||
{ |
||||
FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta); |
||||
} |
||||
static void FUNCC(h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta) |
||||
{ |
||||
FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); |
||||
} |
||||
static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) |
||||
{ |
||||
FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); |
||||
} |
||||
|
||||
static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0) |
||||
{ |
||||
pixel *pix = (pixel*)_pix; |
||||
int i, d; |
||||
xstride /= sizeof(pixel); |
||||
ystride /= sizeof(pixel); |
||||
alpha <<= BIT_DEPTH - 8; |
||||
beta <<= BIT_DEPTH - 8; |
||||
for( i = 0; i < 4; i++ ) { |
||||
const int tc = ((tc0[i] - 1) << (BIT_DEPTH - 8)) + 1; |
||||
if( tc <= 0 ) { |
||||
pix += inner_iters*ystride; |
||||
continue; |
||||
} |
||||
for( d = 0; d < inner_iters; d++ ) { |
||||
const int p0 = pix[-1*xstride]; |
||||
const int p1 = pix[-2*xstride]; |
||||
const int q0 = pix[0]; |
||||
const int q1 = pix[1*xstride]; |
||||
|
||||
if( FFABS( p0 - q0 ) < alpha && |
||||
FFABS( p1 - p0 ) < beta && |
||||
FFABS( q1 - q0 ) < beta ) { |
||||
|
||||
int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); |
||||
|
||||
pix[-xstride] = av_clip_pixel( p0 + delta ); /* p0' */ |
||||
pix[0] = av_clip_pixel( q0 - delta ); /* q0' */ |
||||
} |
||||
pix += ystride; |
||||
} |
||||
} |
||||
} |
||||
static void FUNCC(h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
||||
{ |
||||
FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0); |
||||
} |
||||
static void FUNCC(h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
||||
{ |
||||
FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); |
||||
} |
||||
static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
||||
{ |
||||
FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); |
||||
} |
||||
|
||||
static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta) |
||||
{ |
||||
pixel *pix = (pixel*)_pix; |
||||
int d; |
||||
xstride /= sizeof(pixel); |
||||
ystride /= sizeof(pixel); |
||||
alpha <<= BIT_DEPTH - 8; |
||||
beta <<= BIT_DEPTH - 8; |
||||
for( d = 0; d < 4 * inner_iters; d++ ) { |
||||
const int p0 = pix[-1*xstride]; |
||||
const int p1 = pix[-2*xstride]; |
||||
const int q0 = pix[0]; |
||||
const int q1 = pix[1*xstride]; |
||||
|
||||
if( FFABS( p0 - q0 ) < alpha && |
||||
FFABS( p1 - p0 ) < beta && |
||||
FFABS( q1 - q0 ) < beta ) { |
||||
|
||||
pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ |
||||
pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ |
||||
} |
||||
pix += ystride; |
||||
} |
||||
} |
||||
static void FUNCC(h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta) |
||||
{ |
||||
FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta); |
||||
} |
||||
static void FUNCC(h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta) |
||||
{ |
||||
FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); |
||||
} |
||||
static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) |
||||
{ |
||||
FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); |
||||
} |
@ -0,0 +1,291 @@ |
||||
/*
|
||||
* H.264 IDCT |
||||
* Copyright (c) 2004-2011 Michael Niedermayer <michaelni@gmx.at> |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
/**
|
||||
* @file |
||||
* H.264 IDCT. |
||||
* @author Michael Niedermayer <michaelni@gmx.at> |
||||
*/ |
||||
|
||||
#include "high_bit_depth.h" |
||||
|
||||
#ifndef AVCODEC_H264IDCT_INTERNAL_H |
||||
#define AVCODEC_H264IDCT_INTERNAL_H |
||||
//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
|
||||
static const uint8_t scan8[16 + 2*4]={ |
||||
4+1*8, 5+1*8, 4+2*8, 5+2*8, |
||||
6+1*8, 7+1*8, 6+2*8, 7+2*8, |
||||
4+3*8, 5+3*8, 4+4*8, 5+4*8, |
||||
6+3*8, 7+3*8, 6+4*8, 7+4*8, |
||||
1+1*8, 2+1*8, |
||||
1+2*8, 2+2*8, |
||||
1+4*8, 2+4*8, |
||||
1+5*8, 2+5*8, |
||||
}; |
||||
#endif |
||||
|
||||
static av_always_inline void FUNCC(idct_internal)(uint8_t *_dst, DCTELEM *_block, int stride, int block_stride, int shift, int add){ |
||||
int i; |
||||
INIT_CLIP |
||||
pixel *dst = (pixel*)_dst; |
||||
dctcoef *block = (dctcoef*)_block; |
||||
stride /= sizeof(pixel); |
||||
|
||||
block[0] += 1<<(shift-1); |
||||
|
||||
for(i=0; i<4; i++){ |
||||
const int z0= block[i + block_stride*0] + block[i + block_stride*2]; |
||||
const int z1= block[i + block_stride*0] - block[i + block_stride*2]; |
||||
const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3]; |
||||
const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1); |
||||
|
||||
block[i + block_stride*0]= z0 + z3; |
||||
block[i + block_stride*1]= z1 + z2; |
||||
block[i + block_stride*2]= z1 - z2; |
||||
block[i + block_stride*3]= z0 - z3; |
||||
} |
||||
|
||||
for(i=0; i<4; i++){ |
||||
const int z0= block[0 + block_stride*i] + block[2 + block_stride*i]; |
||||
const int z1= block[0 + block_stride*i] - block[2 + block_stride*i]; |
||||
const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i]; |
||||
const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1); |
||||
|
||||
dst[i + 0*stride]= CLIP(add*dst[i + 0*stride] + ((z0 + z3) >> shift)); |
||||
dst[i + 1*stride]= CLIP(add*dst[i + 1*stride] + ((z1 + z2) >> shift)); |
||||
dst[i + 2*stride]= CLIP(add*dst[i + 2*stride] + ((z1 - z2) >> shift)); |
||||
dst[i + 3*stride]= CLIP(add*dst[i + 3*stride] + ((z0 - z3) >> shift)); |
||||
} |
||||
} |
||||
|
||||
void FUNCC(ff_h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride){ |
||||
FUNCC(idct_internal)(dst, block, stride, 4, 6, 1); |
||||
} |
||||
|
||||
void FUNCC(ff_h264_lowres_idct_add)(uint8_t *dst, int stride, DCTELEM *block){ |
||||
FUNCC(idct_internal)(dst, block, stride, 8, 3, 1); |
||||
} |
||||
|
||||
void FUNCC(ff_h264_lowres_idct_put)(uint8_t *dst, int stride, DCTELEM *block){ |
||||
FUNCC(idct_internal)(dst, block, stride, 8, 3, 0); |
||||
} |
||||
|
||||
void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){ |
||||
int i; |
||||
INIT_CLIP |
||||
pixel *dst = (pixel*)_dst; |
||||
dctcoef *block = (dctcoef*)_block; |
||||
stride /= sizeof(pixel); |
||||
|
||||
block[0] += 32; |
||||
|
||||
for( i = 0; i < 8; i++ ) |
||||
{ |
||||
const int a0 = block[i+0*8] + block[i+4*8]; |
||||
const int a2 = block[i+0*8] - block[i+4*8]; |
||||
const int a4 = (block[i+2*8]>>1) - block[i+6*8]; |
||||
const int a6 = (block[i+6*8]>>1) + block[i+2*8]; |
||||
|
||||
const int b0 = a0 + a6; |
||||
const int b2 = a2 + a4; |
||||
const int b4 = a2 - a4; |
||||
const int b6 = a0 - a6; |
||||
|
||||
const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1); |
||||
const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1); |
||||
const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1); |
||||
const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1); |
||||
|
||||
const int b1 = (a7>>2) + a1; |
||||
const int b3 = a3 + (a5>>2); |
||||
const int b5 = (a3>>2) - a5; |
||||
const int b7 = a7 - (a1>>2); |
||||
|
||||
block[i+0*8] = b0 + b7; |
||||
block[i+7*8] = b0 - b7; |
||||
block[i+1*8] = b2 + b5; |
||||
block[i+6*8] = b2 - b5; |
||||
block[i+2*8] = b4 + b3; |
||||
block[i+5*8] = b4 - b3; |
||||
block[i+3*8] = b6 + b1; |
||||
block[i+4*8] = b6 - b1; |
||||
} |
||||
for( i = 0; i < 8; i++ ) |
||||
{ |
||||
const int a0 = block[0+i*8] + block[4+i*8]; |
||||
const int a2 = block[0+i*8] - block[4+i*8]; |
||||
const int a4 = (block[2+i*8]>>1) - block[6+i*8]; |
||||
const int a6 = (block[6+i*8]>>1) + block[2+i*8]; |
||||
|
||||
const int b0 = a0 + a6; |
||||
const int b2 = a2 + a4; |
||||
const int b4 = a2 - a4; |
||||
const int b6 = a0 - a6; |
||||
|
||||
const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1); |
||||
const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1); |
||||
const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1); |
||||
const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1); |
||||
|
||||
const int b1 = (a7>>2) + a1; |
||||
const int b3 = a3 + (a5>>2); |
||||
const int b5 = (a3>>2) - a5; |
||||
const int b7 = a7 - (a1>>2); |
||||
|
||||
dst[i + 0*stride] = CLIP( dst[i + 0*stride] + ((b0 + b7) >> 6) ); |
||||
dst[i + 1*stride] = CLIP( dst[i + 1*stride] + ((b2 + b5) >> 6) ); |
||||
dst[i + 2*stride] = CLIP( dst[i + 2*stride] + ((b4 + b3) >> 6) ); |
||||
dst[i + 3*stride] = CLIP( dst[i + 3*stride] + ((b6 + b1) >> 6) ); |
||||
dst[i + 4*stride] = CLIP( dst[i + 4*stride] + ((b6 - b1) >> 6) ); |
||||
dst[i + 5*stride] = CLIP( dst[i + 5*stride] + ((b4 - b3) >> 6) ); |
||||
dst[i + 6*stride] = CLIP( dst[i + 6*stride] + ((b2 - b5) >> 6) ); |
||||
dst[i + 7*stride] = CLIP( dst[i + 7*stride] + ((b0 - b7) >> 6) ); |
||||
} |
||||
} |
||||
|
||||
// assumes all AC coefs are 0
|
||||
void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){ |
||||
int i, j; |
||||
int dc = (((dctcoef*)block)[0] + 32) >> 6; |
||||
INIT_CLIP |
||||
pixel *dst = (pixel*)_dst; |
||||
stride /= sizeof(pixel); |
||||
for( j = 0; j < 4; j++ ) |
||||
{ |
||||
for( i = 0; i < 4; i++ ) |
||||
dst[i] = CLIP( dst[i] + dc ); |
||||
dst += stride; |
||||
} |
||||
} |
||||
|
||||
void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){ |
||||
int i, j; |
||||
int dc = (((dctcoef*)block)[0] + 32) >> 6; |
||||
INIT_CLIP |
||||
pixel *dst = (pixel*)_dst; |
||||
stride /= sizeof(pixel); |
||||
for( j = 0; j < 8; j++ ) |
||||
{ |
||||
for( i = 0; i < 8; i++ ) |
||||
dst[i] = CLIP( dst[i] + dc ); |
||||
dst += stride; |
||||
} |
||||
} |
||||
|
||||
void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ |
||||
int i; |
||||
for(i=0; i<16; i++){ |
||||
int nnz = nnzc[ scan8[i] ]; |
||||
if(nnz){ |
||||
if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); |
||||
else FUNCC(idct_internal )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1); |
||||
} |
||||
} |
||||
} |
||||
|
||||
void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ |
||||
int i; |
||||
for(i=0; i<16; i++){ |
||||
if(nnzc[ scan8[i] ]) FUNCC(idct_internal )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1); |
||||
else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); |
||||
} |
||||
} |
||||
|
||||
void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ |
||||
int i; |
||||
for(i=0; i<16; i+=4){ |
||||
int nnz = nnzc[ scan8[i] ]; |
||||
if(nnz){ |
||||
if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); |
||||
else FUNCC(ff_h264_idct8_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); |
||||
} |
||||
} |
||||
} |
||||
|
||||
void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ |
||||
int i; |
||||
for(i=16; i<16+8; i++){ |
||||
if(nnzc[ scan8[i] ]) |
||||
FUNCC(ff_h264_idct_add )(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride); |
||||
else if(((dctcoef*)block)[i*16]) |
||||
FUNCC(ff_h264_idct_dc_add)(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride); |
||||
} |
||||
} |
||||
/**
|
||||
* IDCT transforms the 16 dc values and dequantizes them. |
||||
* @param qp quantization parameter |
||||
*/ |
||||
void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int qmul){ |
||||
#define stride 16 |
||||
int i; |
||||
int temp[16]; |
||||
static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride}; |
||||
dctcoef *input = (dctcoef*)_input; |
||||
dctcoef *output = (dctcoef*)_output; |
||||
|
||||
for(i=0; i<4; i++){ |
||||
const int z0= input[4*i+0] + input[4*i+1]; |
||||
const int z1= input[4*i+0] - input[4*i+1]; |
||||
const int z2= input[4*i+2] - input[4*i+3]; |
||||
const int z3= input[4*i+2] + input[4*i+3]; |
||||
|
||||
temp[4*i+0]= z0+z3; |
||||
temp[4*i+1]= z0-z3; |
||||
temp[4*i+2]= z1-z2; |
||||
temp[4*i+3]= z1+z2; |
||||
} |
||||
|
||||
for(i=0; i<4; i++){ |
||||
const int offset= x_offset[i]; |
||||
const int z0= temp[4*0+i] + temp[4*2+i]; |
||||
const int z1= temp[4*0+i] - temp[4*2+i]; |
||||
const int z2= temp[4*1+i] - temp[4*3+i]; |
||||
const int z3= temp[4*1+i] + temp[4*3+i]; |
||||
|
||||
output[stride* 0+offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); |
||||
output[stride* 1+offset]= ((((z1 + z2)*qmul + 128 ) >> 8)); |
||||
output[stride* 4+offset]= ((((z1 - z2)*qmul + 128 ) >> 8)); |
||||
output[stride* 5+offset]= ((((z0 - z3)*qmul + 128 ) >> 8)); |
||||
} |
||||
#undef stride |
||||
} |
||||
|
||||
void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){ |
||||
const int stride= 16*2; |
||||
const int xStride= 16; |
||||
int a,b,c,d,e; |
||||
dctcoef *block = (dctcoef*)_block; |
||||
|
||||
a= block[stride*0 + xStride*0]; |
||||
b= block[stride*0 + xStride*1]; |
||||
c= block[stride*1 + xStride*0]; |
||||
d= block[stride*1 + xStride*1]; |
||||
|
||||
e= a-b; |
||||
a= a+b; |
||||
b= c-d; |
||||
c= c+d; |
||||
|
||||
block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7; |
||||
block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7; |
||||
block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7; |
||||
block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7; |
||||
} |
@ -0,0 +1,975 @@ |
||||
/*
|
||||
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder |
||||
* Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at> |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
/**
|
||||
* @file |
||||
* H.264 / AVC / MPEG4 part10 prediction functions. |
||||
* @author Michael Niedermayer <michaelni@gmx.at> |
||||
*/ |
||||
|
||||
#include "mathops.h" |
||||
#include "high_bit_depth.h" |
||||
|
||||
static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
const pixel4 a= ((pixel4*)(src-stride))[0]; |
||||
((pixel4*)(src+0*stride))[0]= a; |
||||
((pixel4*)(src+1*stride))[0]= a; |
||||
((pixel4*)(src+2*stride))[0]= a; |
||||
((pixel4*)(src+3*stride))[0]= a; |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
((pixel4*)(src+0*stride))[0]= PIXEL_SPLAT_X4(src[-1+0*stride]); |
||||
((pixel4*)(src+1*stride))[0]= PIXEL_SPLAT_X4(src[-1+1*stride]); |
||||
((pixel4*)(src+2*stride))[0]= PIXEL_SPLAT_X4(src[-1+2*stride]); |
||||
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(src[-1+3*stride]); |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] |
||||
+ src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; |
||||
|
||||
((pixel4*)(src+0*stride))[0]= |
||||
((pixel4*)(src+1*stride))[0]= |
||||
((pixel4*)(src+2*stride))[0]= |
||||
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc); |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; |
||||
|
||||
((pixel4*)(src+0*stride))[0]= |
||||
((pixel4*)(src+1*stride))[0]= |
||||
((pixel4*)(src+2*stride))[0]= |
||||
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc); |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; |
||||
|
||||
((pixel4*)(src+0*stride))[0]= |
||||
((pixel4*)(src+1*stride))[0]= |
||||
((pixel4*)(src+2*stride))[0]= |
||||
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc); |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
((pixel4*)(src+0*stride))[0]= |
||||
((pixel4*)(src+1*stride))[0]= |
||||
((pixel4*)(src+2*stride))[0]= |
||||
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)); |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
((pixel4*)(src+0*stride))[0]= |
||||
((pixel4*)(src+1*stride))[0]= |
||||
((pixel4*)(src+2*stride))[0]= |
||||
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1); |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
((pixel4*)(src+0*stride))[0]= |
||||
((pixel4*)(src+1*stride))[0]= |
||||
((pixel4*)(src+2*stride))[0]= |
||||
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1); |
||||
} |
||||
|
||||
|
||||
#define LOAD_TOP_RIGHT_EDGE\ |
||||
const int av_unused t4= topright[0];\
|
||||
const int av_unused t5= topright[1];\
|
||||
const int av_unused t6= topright[2];\
|
||||
const int av_unused t7= topright[3];\
|
||||
|
||||
#define LOAD_DOWN_LEFT_EDGE\ |
||||
const int av_unused l4= src[-1+4*stride];\
|
||||
const int av_unused l5= src[-1+5*stride];\
|
||||
const int av_unused l6= src[-1+6*stride];\
|
||||
const int av_unused l7= src[-1+7*stride];\
|
||||
|
||||
#define LOAD_LEFT_EDGE\ |
||||
const int av_unused l0= src[-1+0*stride];\
|
||||
const int av_unused l1= src[-1+1*stride];\
|
||||
const int av_unused l2= src[-1+2*stride];\
|
||||
const int av_unused l3= src[-1+3*stride];\
|
||||
|
||||
#define LOAD_TOP_EDGE\ |
||||
const int av_unused t0= src[ 0-1*stride];\
|
||||
const int av_unused t1= src[ 1-1*stride];\
|
||||
const int av_unused t2= src[ 2-1*stride];\
|
||||
const int av_unused t3= src[ 3-1*stride];\
|
||||
|
||||
static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
const int lt= src[-1-1*stride]; |
||||
LOAD_TOP_EDGE |
||||
LOAD_LEFT_EDGE |
||||
|
||||
src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; |
||||
src[0+2*stride]= |
||||
src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; |
||||
src[0+1*stride]= |
||||
src[1+2*stride]= |
||||
src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; |
||||
src[0+0*stride]= |
||||
src[1+1*stride]= |
||||
src[2+2*stride]= |
||||
src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; |
||||
src[1+0*stride]= |
||||
src[2+1*stride]= |
||||
src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2; |
||||
src[2+0*stride]= |
||||
src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; |
||||
src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
const pixel *topright = (const pixel*)_topright; |
||||
int stride = _stride/sizeof(pixel); |
||||
LOAD_TOP_EDGE |
||||
LOAD_TOP_RIGHT_EDGE |
||||
// LOAD_LEFT_EDGE
|
||||
|
||||
src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2; |
||||
src[1+0*stride]= |
||||
src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2; |
||||
src[2+0*stride]= |
||||
src[1+1*stride]= |
||||
src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2; |
||||
src[3+0*stride]= |
||||
src[2+1*stride]= |
||||
src[1+2*stride]= |
||||
src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2; |
||||
src[3+1*stride]= |
||||
src[2+2*stride]= |
||||
src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2; |
||||
src[3+2*stride]= |
||||
src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2; |
||||
src[3+3*stride]=(t6 + 3*t7 + 2)>>2; |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
const int lt= src[-1-1*stride]; |
||||
LOAD_TOP_EDGE |
||||
LOAD_LEFT_EDGE |
||||
|
||||
src[0+0*stride]= |
||||
src[1+2*stride]=(lt + t0 + 1)>>1; |
||||
src[1+0*stride]= |
||||
src[2+2*stride]=(t0 + t1 + 1)>>1; |
||||
src[2+0*stride]= |
||||
src[3+2*stride]=(t1 + t2 + 1)>>1; |
||||
src[3+0*stride]=(t2 + t3 + 1)>>1; |
||||
src[0+1*stride]= |
||||
src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2; |
||||
src[1+1*stride]= |
||||
src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2; |
||||
src[2+1*stride]= |
||||
src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2; |
||||
src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2; |
||||
src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2; |
||||
src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
const pixel *topright = (const pixel*)_topright; |
||||
int stride = _stride/sizeof(pixel); |
||||
LOAD_TOP_EDGE |
||||
LOAD_TOP_RIGHT_EDGE |
||||
|
||||
src[0+0*stride]=(t0 + t1 + 1)>>1; |
||||
src[1+0*stride]= |
||||
src[0+2*stride]=(t1 + t2 + 1)>>1; |
||||
src[2+0*stride]= |
||||
src[1+2*stride]=(t2 + t3 + 1)>>1; |
||||
src[3+0*stride]= |
||||
src[2+2*stride]=(t3 + t4+ 1)>>1; |
||||
src[3+2*stride]=(t4 + t5+ 1)>>1; |
||||
src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; |
||||
src[1+1*stride]= |
||||
src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; |
||||
src[2+1*stride]= |
||||
src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; |
||||
src[3+1*stride]= |
||||
src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; |
||||
src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
LOAD_LEFT_EDGE |
||||
|
||||
src[0+0*stride]=(l0 + l1 + 1)>>1; |
||||
src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2; |
||||
src[2+0*stride]= |
||||
src[0+1*stride]=(l1 + l2 + 1)>>1; |
||||
src[3+0*stride]= |
||||
src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2; |
||||
src[2+1*stride]= |
||||
src[0+2*stride]=(l2 + l3 + 1)>>1; |
||||
src[3+1*stride]= |
||||
src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2; |
||||
src[3+2*stride]= |
||||
src[1+3*stride]= |
||||
src[0+3*stride]= |
||||
src[2+2*stride]= |
||||
src[2+3*stride]= |
||||
src[3+3*stride]=l3; |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *topright, int _stride){ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
const int lt= src[-1-1*stride]; |
||||
LOAD_TOP_EDGE |
||||
LOAD_LEFT_EDGE |
||||
|
||||
src[0+0*stride]= |
||||
src[2+1*stride]=(lt + l0 + 1)>>1; |
||||
src[1+0*stride]= |
||||
src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2; |
||||
src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2; |
||||
src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2; |
||||
src[0+1*stride]= |
||||
src[2+2*stride]=(l0 + l1 + 1)>>1; |
||||
src[1+1*stride]= |
||||
src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2; |
||||
src[0+2*stride]= |
||||
src[2+3*stride]=(l1 + l2+ 1)>>1; |
||||
src[1+2*stride]= |
||||
src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; |
||||
src[0+3*stride]=(l2 + l3 + 1)>>1; |
||||
src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; |
||||
} |
||||
|
||||
static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){ |
||||
int i; |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
const pixel4 a = ((pixel4*)(src-stride))[0]; |
||||
const pixel4 b = ((pixel4*)(src-stride))[1]; |
||||
const pixel4 c = ((pixel4*)(src-stride))[2]; |
||||
const pixel4 d = ((pixel4*)(src-stride))[3]; |
||||
|
||||
for(i=0; i<16; i++){ |
||||
((pixel4*)(src+i*stride))[0] = a; |
||||
((pixel4*)(src+i*stride))[1] = b; |
||||
((pixel4*)(src+i*stride))[2] = c; |
||||
((pixel4*)(src+i*stride))[3] = d; |
||||
} |
||||
} |
||||
|
||||
static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){ |
||||
int i; |
||||
pixel *src = (pixel*)_src; |
||||
stride /= sizeof(pixel); |
||||
|
||||
for(i=0; i<16; i++){ |
||||
((pixel4*)(src+i*stride))[0] = |
||||
((pixel4*)(src+i*stride))[1] = |
||||
((pixel4*)(src+i*stride))[2] = |
||||
((pixel4*)(src+i*stride))[3] = PIXEL_SPLAT_X4(src[-1+i*stride]); |
||||
} |
||||
} |
||||
|
||||
#define PREDICT_16x16_DC(v)\ |
||||
for(i=0; i<16; i++){\
|
||||
AV_WN4P(src+ 0, v);\
|
||||
AV_WN4P(src+ 4, v);\
|
||||
AV_WN4P(src+ 8, v);\
|
||||
AV_WN4P(src+12, v);\
|
||||
src += stride;\
|
||||
} |
||||
|
||||
static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){ |
||||
int i, dc=0; |
||||
pixel *src = (pixel*)_src; |
||||
pixel4 dcsplat; |
||||
stride /= sizeof(pixel); |
||||
|
||||
for(i=0;i<16; i++){ |
||||
dc+= src[-1+i*stride]; |
||||
} |
||||
|
||||
for(i=0;i<16; i++){ |
||||
dc+= src[i-stride]; |
||||
} |
||||
|
||||
dcsplat = PIXEL_SPLAT_X4((dc+16)>>5); |
||||
PREDICT_16x16_DC(dcsplat); |
||||
} |
||||
|
||||
static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){ |
||||
int i, dc=0; |
||||
pixel *src = (pixel*)_src; |
||||
pixel4 dcsplat; |
||||
stride /= sizeof(pixel); |
||||
|
||||
for(i=0;i<16; i++){ |
||||
dc+= src[-1+i*stride]; |
||||
} |
||||
|
||||
dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); |
||||
PREDICT_16x16_DC(dcsplat); |
||||
} |
||||
|
||||
static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){ |
||||
int i, dc=0; |
||||
pixel *src = (pixel*)_src; |
||||
pixel4 dcsplat; |
||||
stride /= sizeof(pixel); |
||||
|
||||
for(i=0;i<16; i++){ |
||||
dc+= src[i-stride]; |
||||
} |
||||
|
||||
dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); |
||||
PREDICT_16x16_DC(dcsplat); |
||||
} |
||||
|
||||
#define PRED16x16_X(n, v) \ |
||||
static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, int stride){\
|
||||
int i;\
|
||||
pixel *src = (pixel*)_src;\
|
||||
stride /= sizeof(pixel);\
|
||||
PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
|
||||
} |
||||
|
||||
PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1); |
||||
PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0); |
||||
PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1); |
||||
|
||||
static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, int _stride, const int svq3, const int rv40){ |
||||
int i, j, k; |
||||
int a; |
||||
INIT_CLIP |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
const pixel * const src0 = src +7-stride; |
||||
const pixel * src1 = src +8*stride-1; |
||||
const pixel * src2 = src1-2*stride; // == src+6*stride-1;
|
||||
int H = src0[1] - src0[-1]; |
||||
int V = src1[0] - src2[ 0]; |
||||
for(k=2; k<=8; ++k) { |
||||
src1 += stride; src2 -= stride; |
||||
H += k*(src0[k] - src0[-k]); |
||||
V += k*(src1[0] - src2[ 0]); |
||||
} |
||||
if(svq3){ |
||||
H = ( 5*(H/4) ) / 16; |
||||
V = ( 5*(V/4) ) / 16; |
||||
|
||||
/* required for 100% accuracy */ |
||||
i = H; H = V; V = i; |
||||
}else if(rv40){ |
||||
H = ( H + (H>>2) ) >> 4; |
||||
V = ( V + (V>>2) ) >> 4; |
||||
}else{ |
||||
H = ( 5*H+32 ) >> 6; |
||||
V = ( 5*V+32 ) >> 6; |
||||
} |
||||
|
||||
a = 16*(src1[0] + src2[16] + 1) - 7*(V+H); |
||||
for(j=16; j>0; --j) { |
||||
int b = a; |
||||
a += V; |
||||
for(i=-16; i<0; i+=4) { |
||||
src[16+i] = CLIP((b ) >> 5); |
||||
src[17+i] = CLIP((b+ H) >> 5); |
||||
src[18+i] = CLIP((b+2*H) >> 5); |
||||
src[19+i] = CLIP((b+3*H) >> 5); |
||||
b += 4*H; |
||||
} |
||||
src += stride; |
||||
} |
||||
} |
||||
|
||||
static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){ |
||||
FUNCC(pred16x16_plane_compat)(src, stride, 0, 0); |
||||
} |
||||
|
||||
static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){ |
||||
int i; |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
const pixel4 a= ((pixel4*)(src-stride))[0]; |
||||
const pixel4 b= ((pixel4*)(src-stride))[1]; |
||||
|
||||
for(i=0; i<8; i++){ |
||||
((pixel4*)(src+i*stride))[0]= a; |
||||
((pixel4*)(src+i*stride))[1]= b; |
||||
} |
||||
} |
||||
|
||||
static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ |
||||
int i; |
||||
pixel *src = (pixel*)_src; |
||||
stride /= sizeof(pixel); |
||||
|
||||
for(i=0; i<8; i++){ |
||||
((pixel4*)(src+i*stride))[0]= |
||||
((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(src[-1+i*stride]); |
||||
} |
||||
} |
||||
|
||||
#define PRED8x8_X(n, v)\ |
||||
static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
|
||||
int i;\
|
||||
pixel *src = (pixel*)_src;\
|
||||
stride /= sizeof(pixel);\
|
||||
for(i=0; i<8; i++){\
|
||||
((pixel4*)(src+i*stride))[0]=\
|
||||
((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(v);\
|
||||
}\
|
||||
} |
||||
|
||||
PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1); |
||||
PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0); |
||||
PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1); |
||||
|
||||
static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ |
||||
int i; |
||||
int dc0, dc2; |
||||
pixel4 dc0splat, dc2splat; |
||||
pixel *src = (pixel*)_src; |
||||
stride /= sizeof(pixel); |
||||
|
||||
dc0=dc2=0; |
||||
for(i=0;i<4; i++){ |
||||
dc0+= src[-1+i*stride]; |
||||
dc2+= src[-1+(i+4)*stride]; |
||||
} |
||||
dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); |
||||
dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); |
||||
|
||||
for(i=0; i<4; i++){ |
||||
((pixel4*)(src+i*stride))[0]= |
||||
((pixel4*)(src+i*stride))[1]= dc0splat; |
||||
} |
||||
for(i=4; i<8; i++){ |
||||
((pixel4*)(src+i*stride))[0]= |
||||
((pixel4*)(src+i*stride))[1]= dc2splat; |
||||
} |
||||
} |
||||
|
||||
static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ |
||||
int i; |
||||
int dc0, dc1; |
||||
pixel4 dc0splat, dc1splat; |
||||
pixel *src = (pixel*)_src; |
||||
stride /= sizeof(pixel); |
||||
|
||||
dc0=dc1=0; |
||||
for(i=0;i<4; i++){ |
||||
dc0+= src[i-stride]; |
||||
dc1+= src[4+i-stride]; |
||||
} |
||||
dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); |
||||
dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); |
||||
|
||||
for(i=0; i<4; i++){ |
||||
((pixel4*)(src+i*stride))[0]= dc0splat; |
||||
((pixel4*)(src+i*stride))[1]= dc1splat; |
||||
} |
||||
for(i=4; i<8; i++){ |
||||
((pixel4*)(src+i*stride))[0]= dc0splat; |
||||
((pixel4*)(src+i*stride))[1]= dc1splat; |
||||
} |
||||
} |
||||
|
||||
static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ |
||||
int i; |
||||
int dc0, dc1, dc2; |
||||
pixel4 dc0splat, dc1splat, dc2splat, dc3splat; |
||||
pixel *src = (pixel*)_src; |
||||
stride /= sizeof(pixel); |
||||
|
||||
dc0=dc1=dc2=0; |
||||
for(i=0;i<4; i++){ |
||||
dc0+= src[-1+i*stride] + src[i-stride]; |
||||
dc1+= src[4+i-stride]; |
||||
dc2+= src[-1+(i+4)*stride]; |
||||
} |
||||
dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); |
||||
dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); |
||||
dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); |
||||
dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); |
||||
|
||||
for(i=0; i<4; i++){ |
||||
((pixel4*)(src+i*stride))[0]= dc0splat; |
||||
((pixel4*)(src+i*stride))[1]= dc1splat; |
||||
} |
||||
for(i=4; i<8; i++){ |
||||
((pixel4*)(src+i*stride))[0]= dc2splat; |
||||
((pixel4*)(src+i*stride))[1]= dc3splat; |
||||
} |
||||
} |
||||
|
||||
//the following 4 function should not be optimized!
|
||||
static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ |
||||
FUNCC(pred8x8_top_dc)(src, stride); |
||||
FUNCC(pred4x4_dc)(src, NULL, stride); |
||||
} |
||||
|
||||
static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){ |
||||
FUNCC(pred8x8_dc)(src, stride); |
||||
FUNCC(pred4x4_top_dc)(src, NULL, stride); |
||||
} |
||||
|
||||
static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){ |
||||
FUNCC(pred8x8_left_dc)(src, stride); |
||||
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); |
||||
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); |
||||
} |
||||
|
||||
static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){ |
||||
FUNCC(pred8x8_left_dc)(src, stride); |
||||
FUNCC(pred4x4_128_dc)(src , NULL, stride); |
||||
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); |
||||
} |
||||
|
||||
static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ |
||||
int j, k; |
||||
int a; |
||||
INIT_CLIP |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
const pixel * const src0 = src +3-stride; |
||||
const pixel * src1 = src +4*stride-1; |
||||
const pixel * src2 = src1-2*stride; // == src+2*stride-1;
|
||||
int H = src0[1] - src0[-1]; |
||||
int V = src1[0] - src2[ 0]; |
||||
for(k=2; k<=4; ++k) { |
||||
src1 += stride; src2 -= stride; |
||||
H += k*(src0[k] - src0[-k]); |
||||
V += k*(src1[0] - src2[ 0]); |
||||
} |
||||
H = ( 17*H+16 ) >> 5; |
||||
V = ( 17*V+16 ) >> 5; |
||||
|
||||
a = 16*(src1[0] + src2[8]+1) - 3*(V+H); |
||||
for(j=8; j>0; --j) { |
||||
int b = a; |
||||
a += V; |
||||
src[0] = CLIP((b ) >> 5); |
||||
src[1] = CLIP((b+ H) >> 5); |
||||
src[2] = CLIP((b+2*H) >> 5); |
||||
src[3] = CLIP((b+3*H) >> 5); |
||||
src[4] = CLIP((b+4*H) >> 5); |
||||
src[5] = CLIP((b+5*H) >> 5); |
||||
src[6] = CLIP((b+6*H) >> 5); |
||||
src[7] = CLIP((b+7*H) >> 5); |
||||
src += stride; |
||||
} |
||||
} |
||||
|
||||
#define SRC(x,y) src[(x)+(y)*stride] |
||||
#define PL(y) \ |
||||
const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; |
||||
#define PREDICT_8x8_LOAD_LEFT \ |
||||
const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
|
||||
+ 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
|
||||
PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
|
||||
const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2 |
||||
|
||||
#define PT(x) \ |
||||
const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; |
||||
#define PREDICT_8x8_LOAD_TOP \ |
||||
const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
|
||||
+ 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
|
||||
PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
|
||||
const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
|
||||
+ 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2 |
||||
|
||||
#define PTR(x) \ |
||||
t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; |
||||
#define PREDICT_8x8_LOAD_TOPRIGHT \ |
||||
int t8, t9, t10, t11, t12, t13, t14, t15; \
|
||||
if(has_topright) { \
|
||||
PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
|
||||
t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
|
||||
} else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1); |
||||
|
||||
#define PREDICT_8x8_LOAD_TOPLEFT \ |
||||
const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2 |
||||
|
||||
#define PREDICT_8x8_DC(v) \ |
||||
int y; \
|
||||
for( y = 0; y < 8; y++ ) { \
|
||||
((pixel4*)src)[0] = \
|
||||
((pixel4*)src)[1] = v; \
|
||||
src += stride; \
|
||||
} |
||||
|
||||
static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
|
||||
PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1))); |
||||
} |
||||
static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
|
||||
PREDICT_8x8_LOAD_LEFT; |
||||
const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3); |
||||
PREDICT_8x8_DC(dc); |
||||
} |
||||
static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
|
||||
PREDICT_8x8_LOAD_TOP; |
||||
const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3); |
||||
PREDICT_8x8_DC(dc); |
||||
} |
||||
static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
|
||||
PREDICT_8x8_LOAD_LEFT; |
||||
PREDICT_8x8_LOAD_TOP; |
||||
const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7 |
||||
+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4); |
||||
PREDICT_8x8_DC(dc); |
||||
} |
||||
static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
|
||||
PREDICT_8x8_LOAD_LEFT; |
||||
#define ROW(y) ((pixel4*)(src+y*stride))[0] =\ |
||||
((pixel4*)(src+y*stride))[1] = PIXEL_SPLAT_X4(l##y) |
||||
ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); |
||||
#undef ROW |
||||
} |
||||
static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
int y; |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
|
||||
PREDICT_8x8_LOAD_TOP; |
||||
src[0] = t0; |
||||
src[1] = t1; |
||||
src[2] = t2; |
||||
src[3] = t3; |
||||
src[4] = t4; |
||||
src[5] = t5; |
||||
src[6] = t6; |
||||
src[7] = t7; |
||||
for( y = 1; y < 8; y++ ) { |
||||
((pixel4*)(src+y*stride))[0] = ((pixel4*)src)[0]; |
||||
((pixel4*)(src+y*stride))[1] = ((pixel4*)src)[1]; |
||||
} |
||||
} |
||||
static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
PREDICT_8x8_LOAD_TOP; |
||||
PREDICT_8x8_LOAD_TOPRIGHT; |
||||
SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; |
||||
SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2; |
||||
SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2; |
||||
SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2; |
||||
SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2; |
||||
SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2; |
||||
SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2; |
||||
SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2; |
||||
SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2; |
||||
SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2; |
||||
SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2; |
||||
SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2; |
||||
SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2; |
||||
SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; |
||||
SRC(7,7)= (t14 + 3*t15 + 2) >> 2; |
||||
} |
||||
static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
PREDICT_8x8_LOAD_TOP; |
||||
PREDICT_8x8_LOAD_LEFT; |
||||
PREDICT_8x8_LOAD_TOPLEFT; |
||||
SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2; |
||||
SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2; |
||||
SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2; |
||||
SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2; |
||||
SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2; |
||||
SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2; |
||||
SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2; |
||||
SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2; |
||||
SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2; |
||||
SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2; |
||||
SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2; |
||||
SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2; |
||||
SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; |
||||
SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; |
||||
SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; |
||||
} |
||||
static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
PREDICT_8x8_LOAD_TOP; |
||||
PREDICT_8x8_LOAD_LEFT; |
||||
PREDICT_8x8_LOAD_TOPLEFT; |
||||
SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2; |
||||
SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2; |
||||
SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2; |
||||
SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2; |
||||
SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2; |
||||
SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2; |
||||
SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2; |
||||
SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1; |
||||
SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2; |
||||
SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1; |
||||
SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2; |
||||
SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1; |
||||
SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2; |
||||
SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1; |
||||
SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2; |
||||
SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1; |
||||
SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2; |
||||
SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1; |
||||
SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2; |
||||
SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1; |
||||
SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; |
||||
SRC(7,0)= (t6 + t7 + 1) >> 1; |
||||
} |
||||
static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
PREDICT_8x8_LOAD_TOP; |
||||
PREDICT_8x8_LOAD_LEFT; |
||||
PREDICT_8x8_LOAD_TOPLEFT; |
||||
SRC(0,7)= (l6 + l7 + 1) >> 1; |
||||
SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2; |
||||
SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1; |
||||
SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2; |
||||
SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1; |
||||
SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2; |
||||
SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1; |
||||
SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2; |
||||
SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1; |
||||
SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2; |
||||
SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1; |
||||
SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2; |
||||
SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1; |
||||
SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2; |
||||
SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1; |
||||
SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2; |
||||
SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2; |
||||
SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2; |
||||
SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2; |
||||
SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2; |
||||
SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; |
||||
SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; |
||||
} |
||||
static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
PREDICT_8x8_LOAD_TOP; |
||||
PREDICT_8x8_LOAD_TOPRIGHT; |
||||
SRC(0,0)= (t0 + t1 + 1) >> 1; |
||||
SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2; |
||||
SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1; |
||||
SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2; |
||||
SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1; |
||||
SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2; |
||||
SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1; |
||||
SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2; |
||||
SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1; |
||||
SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2; |
||||
SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1; |
||||
SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2; |
||||
SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1; |
||||
SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2; |
||||
SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1; |
||||
SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2; |
||||
SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1; |
||||
SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2; |
||||
SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1; |
||||
SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2; |
||||
SRC(7,6)= (t10 + t11 + 1) >> 1; |
||||
SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; |
||||
} |
||||
static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, int has_topright, int _stride) |
||||
{ |
||||
pixel *src = (pixel*)_src; |
||||
int stride = _stride/sizeof(pixel); |
||||
PREDICT_8x8_LOAD_LEFT; |
||||
SRC(0,0)= (l0 + l1 + 1) >> 1; |
||||
SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; |
||||
SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1; |
||||
SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2; |
||||
SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1; |
||||
SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2; |
||||
SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1; |
||||
SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2; |
||||
SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1; |
||||
SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2; |
||||
SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1; |
||||
SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2; |
||||
SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1; |
||||
SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2; |
||||
SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)= |
||||
SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)= |
||||
SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= |
||||
SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; |
||||
} |
||||
#undef PREDICT_8x8_LOAD_LEFT |
||||
#undef PREDICT_8x8_LOAD_TOP |
||||
#undef PREDICT_8x8_LOAD_TOPLEFT |
||||
#undef PREDICT_8x8_LOAD_TOPRIGHT |
||||
#undef PREDICT_8x8_DC |
||||
#undef PTR |
||||
#undef PT |
||||
#undef PL |
||||
#undef SRC |
||||
|
||||
static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ |
||||
int i; |
||||
pixel *pix = (pixel*)_pix; |
||||
const dctcoef *block = (const dctcoef*)_block; |
||||
stride /= sizeof(pixel); |
||||
pix -= stride; |
||||
for(i=0; i<4; i++){ |
||||
pixel v = pix[0]; |
||||
pix[1*stride]= v += block[0]; |
||||
pix[2*stride]= v += block[4]; |
||||
pix[3*stride]= v += block[8]; |
||||
pix[4*stride]= v + block[12]; |
||||
pix++; |
||||
block++; |
||||
} |
||||
} |
||||
|
||||
static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ |
||||
int i; |
||||
pixel *pix = (pixel*)_pix; |
||||
const dctcoef *block = (const dctcoef*)_block; |
||||
stride /= sizeof(pixel); |
||||
for(i=0; i<4; i++){ |
||||
pixel v = pix[-1]; |
||||
pix[0]= v += block[0]; |
||||
pix[1]= v += block[1]; |
||||
pix[2]= v += block[2]; |
||||
pix[3]= v + block[3]; |
||||
pix+= stride; |
||||
block+= 4; |
||||
} |
||||
} |
||||
|
||||
static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ |
||||
int i; |
||||
pixel *pix = (pixel*)_pix; |
||||
const dctcoef *block = (const dctcoef*)_block; |
||||
stride /= sizeof(pixel); |
||||
pix -= stride; |
||||
for(i=0; i<8; i++){ |
||||
pixel v = pix[0]; |
||||
pix[1*stride]= v += block[0]; |
||||
pix[2*stride]= v += block[8]; |
||||
pix[3*stride]= v += block[16]; |
||||
pix[4*stride]= v += block[24]; |
||||
pix[5*stride]= v += block[32]; |
||||
pix[6*stride]= v += block[40]; |
||||
pix[7*stride]= v += block[48]; |
||||
pix[8*stride]= v + block[56]; |
||||
pix++; |
||||
block++; |
||||
} |
||||
} |
||||
|
||||
static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ |
||||
int i; |
||||
pixel *pix = (pixel*)_pix; |
||||
const dctcoef *block = (const dctcoef*)_block; |
||||
stride /= sizeof(pixel); |
||||
for(i=0; i<8; i++){ |
||||
pixel v = pix[-1]; |
||||
pix[0]= v += block[0]; |
||||
pix[1]= v += block[1]; |
||||
pix[2]= v += block[2]; |
||||
pix[3]= v += block[3]; |
||||
pix[4]= v += block[4]; |
||||
pix[5]= v += block[5]; |
||||
pix[6]= v += block[6]; |
||||
pix[7]= v + block[7]; |
||||
pix+= stride; |
||||
block+= 8; |
||||
} |
||||
} |
||||
|
||||
static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ |
||||
int i; |
||||
for(i=0; i<16; i++) |
||||
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); |
||||
} |
||||
|
||||
static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ |
||||
int i; |
||||
for(i=0; i<16; i++) |
||||
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); |
||||
} |
||||
|
||||
static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ |
||||
int i; |
||||
for(i=0; i<4; i++) |
||||
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); |
||||
} |
||||
|
||||
static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ |
||||
int i; |
||||
for(i=0; i<4; i++) |
||||
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); |
||||
} |
@ -0,0 +1,85 @@ |
||||
#include "dsputil.h" |
||||
|
||||
#ifndef BIT_DEPTH |
||||
#define BIT_DEPTH 8 |
||||
#endif |
||||
|
||||
#ifdef AVCODEC_H264_HIGH_DEPTH_H |
||||
# undef pixel |
||||
# undef pixel2 |
||||
# undef pixel4 |
||||
# undef dctcoef |
||||
# undef INIT_CLIP |
||||
# undef no_rnd_avg_pixel4 |
||||
# undef rnd_avg_pixel4 |
||||
# undef AV_RN2P |
||||
# undef AV_RN4P |
||||
# undef AV_WN2P |
||||
# undef AV_WN4P |
||||
# undef AV_WN4PA |
||||
# undef CLIP |
||||
# undef FUNC |
||||
# undef FUNCC |
||||
# undef av_clip_pixel |
||||
# undef PIXEL_SPLAT_X4 |
||||
#else |
||||
# define AVCODEC_H264_HIGH_DEPTH_H |
||||
# define CLIP_PIXEL(depth)\ |
||||
static inline uint16_t av_clip_pixel_ ## depth (int p)\
|
||||
{\
|
||||
const int pixel_max = (1 << depth)-1;\
|
||||
return (p & ~pixel_max) ? (-p)>>31 & pixel_max : p;\
|
||||
} |
||||
|
||||
CLIP_PIXEL( 9) |
||||
CLIP_PIXEL(10) |
||||
#endif |
||||
|
||||
#if BIT_DEPTH > 8 |
||||
# define pixel uint16_t |
||||
# define pixel2 uint32_t |
||||
# define pixel4 uint64_t |
||||
# define dctcoef int32_t |
||||
|
||||
# define INIT_CLIP |
||||
# define no_rnd_avg_pixel4 no_rnd_avg64 |
||||
# define rnd_avg_pixel4 rnd_avg64 |
||||
# define AV_RN2P AV_RN32 |
||||
# define AV_RN4P AV_RN64 |
||||
# define AV_WN2P AV_WN32 |
||||
# define AV_WN4P AV_WN64 |
||||
# define AV_WN4PA AV_WN64A |
||||
# define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL) |
||||
#else |
||||
# define pixel uint8_t |
||||
# define pixel2 uint16_t |
||||
# define pixel4 uint32_t |
||||
# define dctcoef int16_t |
||||
|
||||
# define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
||||
# define no_rnd_avg_pixel4 no_rnd_avg32 |
||||
# define rnd_avg_pixel4 rnd_avg32 |
||||
# define AV_RN2P AV_RN16 |
||||
# define AV_RN4P AV_RN32 |
||||
# define AV_WN2P AV_WN16 |
||||
# define AV_WN4P AV_WN32 |
||||
# define AV_WN4PA AV_WN32A |
||||
# define PIXEL_SPLAT_X4(x) ((x)*0x01010101U) |
||||
#endif |
||||
|
||||
#if BIT_DEPTH == 8 |
||||
# define av_clip_pixel(a) av_clip_uint8(a) |
||||
# define CLIP(a) cm[a] |
||||
# define FUNC(a) a ## _8 |
||||
# define FUNCC(a) a ## _8_c |
||||
#elif BIT_DEPTH == 9 |
||||
# define av_clip_pixel(a) av_clip_pixel_9(a) |
||||
# define CLIP(a) av_clip_pixel_9(a) |
||||
# define FUNC(a) a ## _9 |
||||
# define FUNCC(a) a ## _9_c |
||||
#elif BIT_DEPTH == 10 |
||||
# define av_clip_pixel(a) av_clip_pixel_10(a) |
||||
# define CLIP(a) av_clip_pixel_10(a) |
||||
# define FUNC(a) a ## _10 |
||||
# define FUNCC(a) a ## _10_c |
||||
#endif |
@ -0,0 +1,10 @@ |
||||
0, 0, 2764800, 0xcc4df07d |
||||
0, 3600, 2764800, 0x85f9e6d4 |
||||
0, 7200, 2764800, 0x23ffe90d |
||||
0, 10800, 2764800, 0xf0a6d453 |
||||
0, 14400, 2764800, 0x913a6392 |
||||
0, 18000, 2764800, 0xcc5f9736 |
||||
0, 21600, 2764800, 0x43f9f9ce |
||||
0, 25200, 2764800, 0xc874b44e |
||||
0, 28800, 2764800, 0x83b665e6 |
||||
0, 32400, 2764800, 0x5ea2e31e |
@ -0,0 +1,10 @@ |
||||
0, 0, 2764800, 0x4f710132 |
||||
0, 3600, 2764800, 0x57e5b713 |
||||
0, 7200, 2764800, 0xcca01477 |
||||
0, 10800, 2764800, 0xa19a95cd |
||||
0, 14400, 2764800, 0x700a757d |
||||
0, 18000, 2764800, 0xd8c6f60f |
||||
0, 21600, 2764800, 0x95a1bbc7 |
||||
0, 25200, 2764800, 0x0582077a |
||||
0, 28800, 2764800, 0x91595f91 |
||||
0, 32400, 2764800, 0xf5fe034a |
@ -0,0 +1,10 @@ |
||||
0, 0, 2764800, 0xda69f69e |
||||
0, 3600, 2764800, 0x29ed832f |
||||
0, 7200, 2764800, 0xb3244cc4 |
||||
0, 10800, 2764800, 0xe41a312c |
||||
0, 14400, 2764800, 0xac0b344b |
||||
0, 18000, 2764800, 0xc585aa20 |
||||
0, 21600, 2764800, 0x0952054c |
||||
0, 25200, 2764800, 0xd1a02f87 |
||||
0, 28800, 2764800, 0xfcbfe87c |
||||
0, 32400, 2764800, 0xe4e9b8a2 |
@ -0,0 +1,19 @@ |
||||
0, 0, 6220800, 0xca2a2a5e |
||||
0, 3600, 6220800, 0x8009a65e |
||||
0, 7200, 6220800, 0x63e72b3b |
||||
0, 10800, 6220800, 0x7459a1cc |
||||
0, 14400, 6220800, 0x02191aa9 |
||||
0, 18000, 6220800, 0x88dca590 |
||||
0, 21600, 6220800, 0x56dd150a |
||||
0, 25200, 6220800, 0x5f56a56f |
||||
0, 28800, 6220800, 0x67ada4b7 |
||||
0, 32400, 6220800, 0x88dca590 |
||||
0, 36000, 6220800, 0xd3b09fe5 |
||||
0, 39600, 6220800, 0x2223998c |
||||
0, 43200, 6220800, 0x5e5b2da5 |
||||
0, 46800, 6220800, 0x88dca590 |
||||
0, 50400, 6220800, 0x5e5b2da5 |
||||
0, 54000, 6220800, 0x88dca590 |
||||
0, 57600, 6220800, 0x5e5b2da5 |
||||
0, 61200, 6220800, 0x88dca590 |
||||
0, 64800, 6220800, 0x26e1ec8b |
@ -0,0 +1,10 @@ |
||||
0, 0, 6220800, 0x1df58ce9 |
||||
0, 3600, 6220800, 0x8f2859ce |
||||
0, 7200, 6220800, 0x229cc7ff |
||||
0, 10800, 6220800, 0x73e86984 |
||||
0, 14400, 6220800, 0xb6d4504b |
||||
0, 18000, 6220800, 0x4e7d4883 |
||||
0, 21600, 6220800, 0xbec3f0f7 |
||||
0, 25200, 6220800, 0x1d9af065 |
||||
0, 28800, 6220800, 0x44851549 |
||||
0, 32400, 6220800, 0xfcf8728e |
@ -0,0 +1,10 @@ |
||||
0, 0, 6220800, 0x408daf70 |
||||
0, 3600, 6220800, 0x59b254a3 |
||||
0, 7200, 6220800, 0x4cf4279c |
||||
0, 10800, 6220800, 0x5c9437ae |
||||
0, 14400, 6220800, 0x986c3eb8 |
||||
0, 18000, 6220800, 0x23fd883e |
||||
0, 21600, 6220800, 0x84f222fe |
||||
0, 25200, 6220800, 0xe7f91107 |
||||
0, 28800, 6220800, 0xb544b31e |
||||
0, 32400, 6220800, 0x1ebdde56 |
@ -0,0 +1,10 @@ |
||||
0, 0, 6220800, 0xf81873fe |
||||
0, 3600, 6220800, 0x7b96fbdc |
||||
0, 7200, 6220800, 0x75dbafc4 |
||||
0, 10800, 6220800, 0x7524301e |
||||
0, 14400, 6220800, 0x0f3621ab |
||||
0, 18000, 6220800, 0xa5e25b35 |
||||
0, 21600, 6220800, 0x063a8116 |
||||
0, 25200, 6220800, 0x48ebc8ff |
||||
0, 28800, 6220800, 0x1f635df8 |
||||
0, 32400, 6220800, 0xe282c8bd |
Loading…
Reference in new issue