From 57f09608e1600d1cf1679885a46f5004d522d68f Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sat, 21 Dec 2013 16:03:59 +0100 Subject: [PATCH] dsputil: Move thirdpel-related bits into their own context --- configure | 3 +- doc/optimization.txt | 3 - libavcodec/Makefile | 1 + libavcodec/dsputil.c | 299 +---------------------------- libavcodec/dsputil.h | 16 -- libavcodec/h264qpel_template.c | 1 + libavcodec/hpel_template.c | 45 ----- libavcodec/hpeldsp_template.c | 1 + libavcodec/svq3.c | 18 +- libavcodec/tpel_template.c | 80 ++++++++ libavcodec/tpeldsp.c | 333 +++++++++++++++++++++++++++++++++ libavcodec/tpeldsp.h | 59 ++++++ 12 files changed, 489 insertions(+), 370 deletions(-) create mode 100644 libavcodec/tpel_template.c create mode 100644 libavcodec/tpeldsp.c create mode 100644 libavcodec/tpeldsp.h diff --git a/configure b/configure index fced12f9bd..f26374a3d3 100755 --- a/configure +++ b/configure @@ -1537,6 +1537,7 @@ CONFIG_EXTRA=" rtpdec rtpenc_chain sinewin + tpeldsp videodsp vp3dsp " @@ -1820,7 +1821,7 @@ sipr_decoder_select="lsp" sp5x_decoder_select="mjpeg_decoder" svq1_decoder_select="hpeldsp" svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc" -svq3_decoder_select="h264_decoder hpeldsp" +svq3_decoder_select="h264_decoder hpeldsp tpeldsp" svq3_decoder_suggest="zlib" tak_decoder_select="dsputil" theora_decoder_select="vp3_decoder" diff --git a/doc/optimization.txt b/doc/optimization.txt index 42ad15ed28..b51183fa34 100644 --- a/doc/optimization.txt +++ b/doc/optimization.txt @@ -79,9 +79,6 @@ qpel{8,16}_mc??_old_c / *pixels{8,16}_l4 Just used to work around a bug in an old libavcodec encoder version. Don't optimize them. -tpel_mc_func {put,avg}_tpel_pixels_tab - Used only for SVQ3, so only optimize them if you need fast SVQ3 decoding. - add_bytes/diff_bytes For huffyuv only, optimize if you want a faster ffhuffyuv codec. diff --git a/libavcodec/Makefile b/libavcodec/Makefile index c04b3f1294..3d178a1387 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -65,6 +65,7 @@ OBJS-$(CONFIG_RANGECODER) += rangecoder.o RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) OBJS-$(CONFIG_SINEWIN) += sinewin.o +OBJS-$(CONFIG_TPELDSP) += tpeldsp.o OBJS-$(CONFIG_VAAPI) += vaapi.o OBJS-$(CONFIG_VDPAU) += vdpau.o OBJS-$(CONFIG_VIDEODSP) += videodsp.o diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 9fe6f0b757..b81ba47521 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -48,6 +48,7 @@ uint32_t ff_square_tab[512] = { 0, }; #undef BIT_DEPTH #define BIT_DEPTH 8 +#include "tpel_template.c" #include "dsputil_template.c" // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size @@ -540,284 +541,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, } } -static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - switch (width) { - case 2: - put_pixels2_8_c(dst, src, stride, height); - break; - case 4: - put_pixels4_8_c(dst, src, stride, height); - break; - case 8: - put_pixels8_8_c(dst, src, stride, height); - break; - case 16: - put_pixels16_8_c(dst, src, stride, height); - break; - } -} - -static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = ((2 * src[j] + src[j + 1] + 1) * - 683) >> 11; - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = ((src[j] + 2 * src[j + 1] + 1) * - 683) >> 11; - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = ((2 * src[j] + src[j + stride] + 1) * - 683) >> 11; - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = ((4 * src[j] + 3 * src[j + 1] + - 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * - 2731) >> 15; - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = ((3 * src[j] + 2 * src[j + 1] + - 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * - 2731) >> 15; - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = ((src[j] + 2 * src[j + stride] + 1) * - 683) >> 11; - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = ((3 * src[j] + 4 * src[j + 1] + - 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * - 2731) >> 15; - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = ((2 * src[j] + 3 * src[j + 1] + - 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * - 2731) >> 15; - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - switch (width) { - case 2: - avg_pixels2_8_c(dst, src, stride, height); - break; - case 4: - avg_pixels4_8_c(dst, src, stride, height); - break; - case 8: - avg_pixels8_8_c(dst, src, stride, height); - break; - case 16: - avg_pixels16_8_c(dst, src, stride, height); - break; - } -} - -static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = (dst[j] + - (((2 * src[j] + src[j + 1] + 1) * - 683) >> 11) + 1) >> 1; - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = (dst[j] + - (((src[j] + 2 * src[j + 1] + 1) * - 683) >> 11) + 1) >> 1; - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = (dst[j] + - (((2 * src[j] + src[j + stride] + 1) * - 683) >> 11) + 1) >> 1; - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = (dst[j] + - (((4 * src[j] + 3 * src[j + 1] + - 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * - 2731) >> 15) + 1) >> 1; - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = (dst[j] + - (((3 * src[j] + 2 * src[j + 1] + - 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * - 2731) >> 15) + 1) >> 1; - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = (dst[j] + - (((src[j] + 2 * src[j + stride] + 1) * - 683) >> 11) + 1) >> 1; - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = (dst[j] + - (((3 * src[j] + 4 * src[j + 1] + - 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * - 2731) >> 15) + 1) >> 1; - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, - int stride, int width, int height) -{ - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - dst[j] = (dst[j] + - (((2 * src[j] + 3 * src[j + 1] + - 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * - 2731) >> 15) + 1) >> 1; - src += stride; - dst += stride; - } -} - #define QPEL_MC(r, OPNAME, RND, OP) \ static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, \ int dstStride, int srcStride, \ @@ -2781,26 +2504,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) c->pix_abs[1][2] = pix_abs8_y2_c; c->pix_abs[1][3] = pix_abs8_xy2_c; - c->put_tpel_pixels_tab[0] = put_tpel_pixels_mc00_c; - c->put_tpel_pixels_tab[1] = put_tpel_pixels_mc10_c; - c->put_tpel_pixels_tab[2] = put_tpel_pixels_mc20_c; - c->put_tpel_pixels_tab[4] = put_tpel_pixels_mc01_c; - c->put_tpel_pixels_tab[5] = put_tpel_pixels_mc11_c; - c->put_tpel_pixels_tab[6] = put_tpel_pixels_mc21_c; - c->put_tpel_pixels_tab[8] = put_tpel_pixels_mc02_c; - c->put_tpel_pixels_tab[9] = put_tpel_pixels_mc12_c; - c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; - - c->avg_tpel_pixels_tab[0] = avg_tpel_pixels_mc00_c; - c->avg_tpel_pixels_tab[1] = avg_tpel_pixels_mc10_c; - c->avg_tpel_pixels_tab[2] = avg_tpel_pixels_mc20_c; - c->avg_tpel_pixels_tab[4] = avg_tpel_pixels_mc01_c; - c->avg_tpel_pixels_tab[5] = avg_tpel_pixels_mc11_c; - c->avg_tpel_pixels_tab[6] = avg_tpel_pixels_mc21_c; - c->avg_tpel_pixels_tab[8] = avg_tpel_pixels_mc02_c; - c->avg_tpel_pixels_tab[9] = avg_tpel_pixels_mc12_c; - c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; - #define dspfunc(PFX, IDX, NUM) \ c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \ c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \ diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index d596e29099..052ac50694 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -71,9 +71,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. * h for op_pixels_func is limited to { width / 2, width }, * but never larger than 16 and never smaller than 4. */ -typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */, - const uint8_t *pixels /* align 1 */, - int line_size, int w, int h); typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */, uint8_t *src /* align 1 */, ptrdiff_t stride); @@ -188,19 +185,6 @@ typedef struct DSPContext { int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, int size); - /** - * Thirdpel motion compensation with rounding (a + b + 1) >> 1. - * this is an array[12] of motion compensation functions for the - * 9 thirdpel positions
- * *pixels_tab[xthirdpel + 4 * ythirdpel] - * @param block destination where the result is stored - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - */ - tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width? - tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width? - qpel_mc_func put_qpel_pixels_tab[2][16]; qpel_mc_func avg_qpel_pixels_tab[2][16]; qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; diff --git a/libavcodec/h264qpel_template.c b/libavcodec/h264qpel_template.c index 71821798a4..d03b0dc443 100644 --- a/libavcodec/h264qpel_template.c +++ b/libavcodec/h264qpel_template.c @@ -24,6 +24,7 @@ #include "bit_depth_template.c" #include "hpel_template.c" +#include "tpel_template.c" static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) { diff --git a/libavcodec/hpel_template.c b/libavcodec/hpel_template.c index 1bc18ccad0..0a8550a738 100644 --- a/libavcodec/hpel_template.c +++ b/libavcodec/hpel_template.c @@ -22,47 +22,6 @@ #include "pixels.h" #define DEF_HPEL(OPNAME, OP) \ -static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block, \ - const uint8_t *pixels, \ - ptrdiff_t line_size, \ - int h) \ -{ \ - int i; \ - for (i = 0; i < h; i++) { \ - OP(*((pixel2 *) block), AV_RN2P(pixels)); \ - pixels += line_size; \ - block += line_size; \ - } \ -} \ - \ -static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block, \ - const uint8_t *pixels, \ - ptrdiff_t line_size, \ - int h) \ -{ \ - int i; \ - for (i = 0; i < h; i++) { \ - OP(*((pixel4 *) block), AV_RN4P(pixels)); \ - pixels += line_size; \ - block += line_size; \ - } \ -} \ - \ -static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block, \ - const uint8_t *pixels, \ - ptrdiff_t line_size, \ - int h) \ -{ \ - int i; \ - for (i = 0; i < h; i++) { \ - OP(*((pixel4 *) block), AV_RN4P(pixels)); \ - OP(*((pixel4 *) (block + 4 * sizeof(pixel))), \ - AV_RN4P(pixels + 4 * sizeof(pixel))); \ - pixels += line_size; \ - block += line_size; \ - } \ -} \ - \ static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, \ const uint8_t *src1, \ const uint8_t *src2, \ @@ -134,10 +93,6 @@ static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, \ dst_stride, src_stride1, \ src_stride2, h); \ } \ - \ -CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16), \ - FUNCC(OPNAME ## _pixels8), \ - 8 * sizeof(pixel)) #define op_avg(a, b) a = rnd_avg_pixel4(a, b) #define op_put(a, b) a = b diff --git a/libavcodec/hpeldsp_template.c b/libavcodec/hpeldsp_template.c index f190457b96..3039bfa9d6 100644 --- a/libavcodec/hpeldsp_template.c +++ b/libavcodec/hpeldsp_template.c @@ -33,6 +33,7 @@ #include "bit_depth_template.c" #include "hpel_template.c" +#include "tpel_template.c" #define PIXOP2(OPNAME, OP) \ static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, \ diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c index 4916314c08..fc2120b2cb 100644 --- a/libavcodec/svq3.c +++ b/libavcodec/svq3.c @@ -54,6 +54,7 @@ #include "golomb.h" #include "hpeldsp.h" #include "rectangle.h" +#include "tpeldsp.h" #if CONFIG_ZLIB #include @@ -70,6 +71,7 @@ typedef struct { H264Context h; HpelDSPContext hdsp; + TpelDSPContext tdsp; H264Picture *cur_pic; H264Picture *next_pic; H264Picture *last_pic; @@ -321,9 +323,9 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, src = h->edge_emu_buffer; } if (thirdpel) - (avg ? h->dsp.avg_tpel_pixels_tab - : h->dsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize, - width, height); + (avg ? s->tdsp.avg_tpel_pixels_tab + : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize, + width, height); else (avg ? s->hdsp.avg_pixels_tab : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, h->linesize, @@ -349,10 +351,10 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, src = h->edge_emu_buffer; } if (thirdpel) - (avg ? h->dsp.avg_tpel_pixels_tab - : h->dsp.put_tpel_pixels_tab)[dxy](dest, src, - h->uvlinesize, - width, height); + (avg ? s->tdsp.avg_tpel_pixels_tab + : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, + h->uvlinesize, + width, height); else (avg ? s->hdsp.avg_pixels_tab : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, @@ -881,6 +883,8 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx) return -1; ff_hpeldsp_init(&s->hdsp, avctx->flags); + ff_tpeldsp_init(&s->tdsp); + h->flags = avctx->flags; h->is_complex = 1; h->picture_structure = PICT_FRAME; diff --git a/libavcodec/tpel_template.c b/libavcodec/tpel_template.c new file mode 100644 index 0000000000..f07679a88f --- /dev/null +++ b/libavcodec/tpel_template.c @@ -0,0 +1,80 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include + +#include "libavutil/intreadwrite.h" +#include "pixels.h" +#include "rnd_avg.h" + +#include "bit_depth_template.c" + +#define DEF_TPEL(OPNAME, OP) \ +static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block, \ + const uint8_t *pixels, \ + ptrdiff_t line_size, \ + int h) \ +{ \ + int i; \ + for (i = 0; i < h; i++) { \ + OP(*((pixel2 *) block), AV_RN2P(pixels)); \ + pixels += line_size; \ + block += line_size; \ + } \ +} \ + \ +static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block, \ + const uint8_t *pixels, \ + ptrdiff_t line_size, \ + int h) \ +{ \ + int i; \ + for (i = 0; i < h; i++) { \ + OP(*((pixel4 *) block), AV_RN4P(pixels)); \ + pixels += line_size; \ + block += line_size; \ + } \ +} \ + \ +static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block, \ + const uint8_t *pixels, \ + ptrdiff_t line_size, \ + int h) \ +{ \ + int i; \ + for (i = 0; i < h; i++) { \ + OP(*((pixel4 *) block), AV_RN4P(pixels)); \ + OP(*((pixel4 *) (block + 4 * sizeof(pixel))), \ + AV_RN4P(pixels + 4 * sizeof(pixel))); \ + pixels += line_size; \ + block += line_size; \ + } \ +} \ + \ +CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16), \ + FUNCC(OPNAME ## _pixels8), \ + 8 * sizeof(pixel)) + +#define op_avg(a, b) a = rnd_avg_pixel4(a, b) +#define op_put(a, b) a = b + +DEF_TPEL(avg, op_avg) +DEF_TPEL(put, op_put) +#undef op_avg +#undef op_put diff --git a/libavcodec/tpeldsp.c b/libavcodec/tpeldsp.c new file mode 100644 index 0000000000..6a1681311a --- /dev/null +++ b/libavcodec/tpeldsp.c @@ -0,0 +1,333 @@ +/* + * thirdpel DSP functions + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * thirdpel DSP functions + */ + +#include + +#include "libavutil/attributes.h" +#include "tpeldsp.h" + +#define BIT_DEPTH 8 +#include "tpel_template.c" + +static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + switch (width) { + case 2: + put_pixels2_8_c(dst, src, stride, height); + break; + case 4: + put_pixels4_8_c(dst, src, stride, height); + break; + case 8: + put_pixels8_8_c(dst, src, stride, height); + break; + case 16: + put_pixels16_8_c(dst, src, stride, height); + break; + } +} + +static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = ((2 * src[j] + src[j + 1] + 1) * + 683) >> 11; + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = ((src[j] + 2 * src[j + 1] + 1) * + 683) >> 11; + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = ((2 * src[j] + src[j + stride] + 1) * + 683) >> 11; + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = ((4 * src[j] + 3 * src[j + 1] + + 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * + 2731) >> 15; + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = ((3 * src[j] + 2 * src[j + 1] + + 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * + 2731) >> 15; + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = ((src[j] + 2 * src[j + stride] + 1) * + 683) >> 11; + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = ((3 * src[j] + 4 * src[j + 1] + + 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * + 2731) >> 15; + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = ((2 * src[j] + 3 * src[j + 1] + + 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * + 2731) >> 15; + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + switch (width) { + case 2: + avg_pixels2_8_c(dst, src, stride, height); + break; + case 4: + avg_pixels4_8_c(dst, src, stride, height); + break; + case 8: + avg_pixels8_8_c(dst, src, stride, height); + break; + case 16: + avg_pixels16_8_c(dst, src, stride, height); + break; + } +} + +static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = (dst[j] + + (((2 * src[j] + src[j + 1] + 1) * + 683) >> 11) + 1) >> 1; + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = (dst[j] + + (((src[j] + 2 * src[j + 1] + 1) * + 683) >> 11) + 1) >> 1; + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = (dst[j] + + (((2 * src[j] + src[j + stride] + 1) * + 683) >> 11) + 1) >> 1; + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = (dst[j] + + (((4 * src[j] + 3 * src[j + 1] + + 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * + 2731) >> 15) + 1) >> 1; + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = (dst[j] + + (((3 * src[j] + 2 * src[j + 1] + + 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * + 2731) >> 15) + 1) >> 1; + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = (dst[j] + + (((src[j] + 2 * src[j + stride] + 1) * + 683) >> 11) + 1) >> 1; + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = (dst[j] + + (((3 * src[j] + 4 * src[j + 1] + + 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * + 2731) >> 15) + 1) >> 1; + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, + int stride, int width, int height) +{ + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + dst[j] = (dst[j] + + (((2 * src[j] + 3 * src[j + 1] + + 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * + 2731) >> 15) + 1) >> 1; + src += stride; + dst += stride; + } +} + +av_cold void ff_tpeldsp_init(TpelDSPContext *c) +{ + c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; + c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; + c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; + c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; + c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; + c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; + c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; + c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; + c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; + + c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; + c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; + c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; + c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; + c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; + c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; + c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; + c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; + c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; +} diff --git a/libavcodec/tpeldsp.h b/libavcodec/tpeldsp.h new file mode 100644 index 0000000000..9c67d60850 --- /dev/null +++ b/libavcodec/tpeldsp.h @@ -0,0 +1,59 @@ +/* + * thirdpel DSP functions + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * thirdpel DSP functions + */ + +#ifndef AVCODEC_TPELDSP_H +#define AVCODEC_TPELDSP_H + +#include + +/* add and put pixel (decoding) */ +// blocksizes for hpel_pixels_func are 8x4,8x8 16x8 16x16 +// h for hpel_pixels_func is limited to {width/2, width} but never larger +// than 16 and never smaller than 4 +typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */, + const uint8_t *pixels /* align 1 */, + int line_size, int w, int h); + +/** + * thirdpel DSP context + */ +typedef struct TpelDSPContext { + /** + * Thirdpel motion compensation with rounding (a + b + 1) >> 1. + * this is an array[12] of motion compensation functions for the + * 9 thirdpel positions
+ * *pixels_tab[xthirdpel + 4 * ythirdpel] + * @param block destination where the result is stored + * @param pixels source + * @param line_size number of bytes in a horizontal line of block + * @param h height + */ + tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width? + tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width? +} TpelDSPContext; + +void ff_tpeldsp_init(TpelDSPContext *c); + +#endif /* AVCODEC_TPELDSP_H */