From 05aec7bb87236bf6e7c0f61fb6b20c5c922b49e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Rullg=C3=A5rd?= Date: Sun, 14 Mar 2010 17:50:12 +0000 Subject: [PATCH] Separate DWT from snow and dsputil This moves the DWT functions from snow.c and dsputil.c to a file of their own. A new struct, DWTContext, holds the function pointers previously part of DSPContext. Originally committed as revision 22522 to svn://svn.ffmpeg.org/ffmpeg/trunk --- configure | 4 +- libavcodec/Makefile | 3 +- libavcodec/dsputil.c | 112 +---- libavcodec/dsputil.h | 12 +- libavcodec/dwt.c | 843 +++++++++++++++++++++++++++++++++++ libavcodec/dwt.h | 156 +++++++ libavcodec/ivi_dsp.c | 1 + libavcodec/snow.c | 728 +----------------------------- libavcodec/snow.h | 97 +--- libavcodec/x86/dsputil_mmx.c | 19 - libavcodec/x86/dsputil_mmx.h | 9 - libavcodec/x86/snowdsp_mmx.c | 37 +- 12 files changed, 1046 insertions(+), 975 deletions(-) create mode 100644 libavcodec/dwt.c create mode 100644 libavcodec/dwt.h diff --git a/configure b/configure index 60834ca0fb..7262c25948 100755 --- a/configure +++ b/configure @@ -894,6 +894,7 @@ CONFIG_LIST=" bzlib dct doc + dwt dxva2 fastdiv ffmpeg @@ -1276,7 +1277,8 @@ rv30_decoder_select="golomb" rv40_decoder_select="golomb" shorten_decoder_select="golomb" sipr_decoder_select="lsp" -snow_encoder_select="aandct" +snow_decoder_select="dwt" +snow_encoder_select="aandct dwt" sonic_decoder_select="golomb" sonic_encoder_select="golomb" sonic_ls_encoder_select="golomb" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 2253e29cbb..47ebcb282b 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -28,6 +28,7 @@ OBJS = allcodecs.o \ OBJS-$(CONFIG_AANDCT) += aandcttab.o OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o OBJS-$(CONFIG_DCT) += dct.o +OBJS-$(CONFIG_DWT) += dwt.o OBJS-$(CONFIG_DXVA2) += dxva2.o FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o OBJS-$(CONFIG_FFT) += avfft.o fft.o $(FFT-OBJS-yes) @@ -598,7 +599,7 @@ MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o MMX-OBJS-$(CONFIG_GPL) += x86/idct_mmx.o MMX-OBJS-$(CONFIG_LPC) += x86/lpc_mmx.o -MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp_mmx.o +MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o MMX-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp_mmx.o \ x86/vp3dsp_sse2.o diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index cb9d38100c..8535c6d665 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -33,7 +33,6 @@ #include "faandct.h" #include "faanidct.h" #include "mathops.h" -#include "snow.h" #include "mpegvideo.h" #include "config.h" #include "lpc.h" @@ -329,102 +328,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) return s; } - -#if CONFIG_SNOW_ENCODER //dwt is in snow.c -static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ - int s, i, j; - const int dec_count= w==8 ? 3 : 4; - int tmp[32*32]; - int level, ori; - static const int scale[2][2][4][4]={ - { - { - // 9/7 8x8 dec=3 - {268, 239, 239, 213}, - { 0, 224, 224, 152}, - { 0, 135, 135, 110}, - },{ - // 9/7 16x16 or 32x32 dec=4 - {344, 310, 310, 280}, - { 0, 320, 320, 228}, - { 0, 175, 175, 136}, - { 0, 129, 129, 102}, - } - },{ - { - // 5/3 8x8 dec=3 - {275, 245, 245, 218}, - { 0, 230, 230, 156}, - { 0, 138, 138, 113}, - },{ - // 5/3 16x16 or 32x32 dec=4 - {352, 317, 317, 286}, - { 0, 328, 328, 233}, - { 0, 180, 180, 140}, - { 0, 132, 132, 105}, - } - } - }; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j+=4) { - tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; - tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; - tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; - tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; - } - pix1 += line_size; - pix2 += line_size; - } - - ff_spatial_dwt(tmp, w, h, 32, type, dec_count); - - s=0; - assert(w==h); - for(level=0; level>(dec_count-level); - int sx= (ori&1) ? size : 0; - int stride= 32<<(dec_count-level); - int sy= (ori&2) ? stride>>1 : 0; - - for(i=0; i=0); - return s>>9; -} - -static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 8, h, 1); -} - -static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 8, h, 0); -} - -static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 16, h, 1); -} - -static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 16, h, 0); -} - -int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 32, h, 1); -} - -int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 32, h, 0); -} -#endif - /* draw the edges of width 'w' of an image of size width, height */ //FIXME check that this is ok for mpeg4 interlaced static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w) @@ -3531,7 +3434,7 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ case FF_CMP_NSSE: cmp[i]= c->nsse[i]; break; -#if CONFIG_SNOW_ENCODER +#if CONFIG_DWT case FF_CMP_W53: cmp[i]= c->w53[i]; break; @@ -4816,11 +4719,8 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->vsse[5]= vsse_intra8_c; c->nsse[0]= nsse16_c; c->nsse[1]= nsse8_c; -#if CONFIG_SNOW_ENCODER - c->w53[0]= w53_16_c; - c->w53[1]= w53_8_c; - c->w97[0]= w97_16_c; - c->w97[1]= w97_8_c; +#if CONFIG_DWT + ff_dsputil_init_dwt(c); #endif c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; @@ -4865,12 +4765,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->try_8x8basis= try_8x8basis_c; c->add_8x8basis= add_8x8basis_c; -#if CONFIG_SNOW_DECODER - c->vertical_compose97i = ff_snow_vertical_compose97i; - c->horizontal_compose97i = ff_snow_horizontal_compose97i; - c->inner_add_yblock = ff_snow_inner_add_yblock; -#endif - #if CONFIG_VORBIS_DECODER c->vorbis_inverse_coupling = vorbis_inverse_coupling; #endif diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 97c08fe5d2..5a2246cbee 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -37,8 +37,6 @@ //#define DEBUG /* dct code */ typedef short DCTELEM; -typedef int DWTELEM; -typedef short IDWTELEM; void fdct_ifast (DCTELEM *data); void fdct_ifast248 (DCTELEM *data); @@ -185,10 +183,6 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ // although currently h<4 is not used as functions with width <8 are neither used nor implemented typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; - -// for snow slices -typedef struct slice_buffer_s slice_buffer; - /** * Scantable. */ @@ -538,11 +532,6 @@ typedef struct DSPContext { void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - /* snow wavelet */ - void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); - void (*horizontal_compose97i)(IDWTELEM *b, int width); - void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); - void (*prefetch)(void *mem, int stride, int h); void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); @@ -681,6 +670,7 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); +void ff_dsputil_init_dwt(DSPContext *c); void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx); void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx); void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx); diff --git a/libavcodec/dwt.c b/libavcodec/dwt.c new file mode 100644 index 0000000000..28b3036337 --- /dev/null +++ b/libavcodec/dwt.c @@ -0,0 +1,843 @@ +/* + * Copyright (C) 2004-2010 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "dsputil.h" +#include "dwt.h" + +void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) +{ + int i; + + buf->base_buffer = base_buffer; + buf->line_count = line_count; + buf->line_width = line_width; + buf->data_count = max_allocated_lines; + buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); + buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); + + for(i = 0; i < max_allocated_lines; i++){ + buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); + } + + buf->data_stack_top = max_allocated_lines - 1; +} + +IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) +{ + IDWTELEM * buffer; + + assert(buf->data_stack_top >= 0); +// assert(!buf->line[line]); + if (buf->line[line]) + return buf->line[line]; + + buffer = buf->data_stack[buf->data_stack_top]; + buf->data_stack_top--; + buf->line[line] = buffer; + + return buffer; +} + +void slice_buffer_release(slice_buffer * buf, int line) +{ + IDWTELEM * buffer; + + assert(line >= 0 && line < buf->line_count); + assert(buf->line[line]); + + buffer = buf->line[line]; + buf->data_stack_top++; + buf->data_stack[buf->data_stack_top] = buffer; + buf->line[line] = NULL; +} + +void slice_buffer_flush(slice_buffer * buf) +{ + int i; + for(i = 0; i < buf->line_count; i++){ + if (buf->line[i]) + slice_buffer_release(buf, i); + } +} + +void slice_buffer_destroy(slice_buffer * buf) +{ + int i; + slice_buffer_flush(buf); + + for(i = buf->data_count - 1; i >= 0; i--){ + av_freep(&buf->data_stack[i]); + } + av_freep(&buf->data_stack); + av_freep(&buf->line); +} + +static inline int mirror(int v, int m){ + while((unsigned)v > (unsigned)m){ + v=-v; + if(v<0) v+= 2*m; + } + return v; +} + +static av_always_inline void +lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, + int dst_step, int src_step, int ref_step, + int width, int mul, int add, int shift, + int highpass, int inverse){ + const int mirror_left= !highpass; + const int mirror_right= (width&1) ^ highpass; + const int w= (width>>1) - 1 + (highpass & width); + int i; + +#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) + if(mirror_left){ + dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); + dst += dst_step; + src += src_step; + } + + for(i=0; i>shift), + inverse); + } + + if(mirror_right){ + dst[w*dst_step] = + LIFT(src[w*src_step], + ((mul*2*ref[w*ref_step]+add)>>shift), + inverse); + } +} + +static av_always_inline void +inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, + int dst_step, int src_step, int ref_step, + int width, int mul, int add, int shift, + int highpass, int inverse){ + const int mirror_left= !highpass; + const int mirror_right= (width&1) ^ highpass; + const int w= (width>>1) - 1 + (highpass & width); + int i; + +#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) + if(mirror_left){ + dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); + dst += dst_step; + src += src_step; + } + + for(i=0; i>shift), + inverse); + } + + if(mirror_right){ + dst[w*dst_step] = + LIFT(src[w*src_step], + ((mul*2*ref[w*ref_step]+add)>>shift), + inverse); + } +} + +#ifndef liftS +static av_always_inline void +liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, + int dst_step, int src_step, int ref_step, + int width, int mul, int add, int shift, + int highpass, int inverse){ + const int mirror_left= !highpass; + const int mirror_right= (width&1) ^ highpass; + const int w= (width>>1) - 1 + (highpass & width); + int i; + + assert(shift == 4); +#define LIFTS(src, ref, inv) \ + ((inv) ? \ + (src) + (((ref) + 4*(src))>>shift): \ + -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) + if(mirror_left){ + dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); + dst += dst_step; + src += src_step; + } + + for(i=0; i>1) - 1 + (highpass & width); + int i; + + assert(shift == 4); +#define LIFTS(src, ref, inv) \ + ((inv) ? \ + (src) + (((ref) + 4*(src))>>shift): \ + -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) + if(mirror_left){ + dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); + dst += dst_step; + src += src_step; + } + + for(i=0; i>1; + int x; + const int w2= (width+1)>>1; + + for(x=0; x>1; + A4 += (A1 + 1)>>1; + b[0+width2] = A1; + b[0 ] = A4; + for(x=1; x+1>1; + A2 += (A1 + A3 + 2)>>2; + b[x+width2] = A3; + b[x ] = A2; + + A1= temp[x+1+width2]; + A2= temp[x+2 ]; + A1 -= (A2 + A4)>>1; + A4 += (A1 + A3 + 2)>>2; + b[x+1+width2] = A1; + b[x+1 ] = A4; + } + A3= temp[width-1]; + A3 -= A2; + A2 += (A1 + A3 + 2)>>2; + b[width -1] = A3; + b[width2-1] = A2; + } +#else + lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); + lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); +#endif /* 0 */ +} + +static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i>1; + } +} + +static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i>2; + } +} + +static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ + int y; + DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; + DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; + + for(y=-2; y>1; + + lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); + liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); + lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); + lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); +} + + +static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i>W_AS; + } +} + +static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i>W_CS; + } +} + +static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i>W_BS; +#else + b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23); +#endif + } +} + +static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i>W_DS; + } +} + +static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ + int y; + DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; + DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; + DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; + DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; + + for(y=-4; y>level, height>>level, stride<>level, height>>level, stride<>1; + const int w2= (width+1)>>1; + int x; + + for(x=0; x>1); + for(x=2; x>2); + b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1); + } + if(width&1){ + b[x ] = temp[x ] - ((temp[x-1]+1)>>1); + b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1); + }else + b[x-1] = temp[x-1] + b[x-2]; +} + +static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ + int i; + + for(i=0; i>1; + } +} + +static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ + int i; + + for(i=0; i>2; + } +} + +static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ + cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); + cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); + cs->y = -1; +} + +static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ + cs->b0 = buffer + mirror(-1-1, height-1)*stride; + cs->b1 = buffer + mirror(-1 , height-1)*stride; + cs->y = -1; +} + +static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ + int y= cs->y; + + IDWTELEM *b0= cs->b0; + IDWTELEM *b1= cs->b1; + IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); + IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); + + if(y+1<(unsigned)height && y<(unsigned)height){ + int x; + + for(x=0; x>2; + b1[x] += (b0[x] + b2[x])>>1; + } + }else{ + if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); + if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); + } + + if(y-1<(unsigned)height) horizontal_compose53i(b0, width); + if(y+0<(unsigned)height) horizontal_compose53i(b1, width); + + cs->b0 = b2; + cs->b1 = b3; + cs->y += 2; +} + +static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ + int y= cs->y; + IDWTELEM *b0= cs->b0; + IDWTELEM *b1= cs->b1; + IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; + IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; + + if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); + if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); + + if(y-1<(unsigned)height) horizontal_compose53i(b0, width); + if(y+0<(unsigned)height) horizontal_compose53i(b1, width); + + cs->b0 = b2; + cs->b1 = b3; + cs->y += 2; +} + +static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ + DWTCompose cs; + spatial_compose53i_init(&cs, buffer, height, stride); + while(cs.y <= height) + spatial_compose53i_dy(&cs, buffer, width, height, stride); +} + + +void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ + IDWTELEM temp[width]; + const int w2= (width+1)>>1; + +#if 0 //maybe more understadable but slower + inv_lift (temp , b , b +w2, 2, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); + inv_lift (temp+1 , b +w2, temp , 2, 1, 2, width, W_CM, W_CO, W_CS, 1, 1); + + inv_liftS(b , temp , temp+1 , 2, 2, 2, width, W_BM, W_BO, W_BS, 0, 1); + inv_lift (b+1 , temp+1 , b , 2, 2, 2, width, W_AM, W_AO, W_AS, 1, 0); +#else + int x; + temp[0] = b[0] - ((3*b[w2]+2)>>2); + for(x=1; x<(width>>1); x++){ + temp[2*x ] = b[x ] - ((3*(b [x+w2-1] + b[x+w2])+4)>>3); + temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x]; + } + if(width&1){ + temp[2*x ] = b[x ] - ((3*b [x+w2-1]+2)>>2); + temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x]; + }else + temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2]; + + b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3); + for(x=2; x>4); + b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1); + } + if(width&1){ + b[x ] = temp[x ] + ((2*temp[x ] + temp[x-1]+4)>>3); + b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1); + }else + b[x-1] = temp[x-1] + 3*b [x-2]; +#endif +} + +static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ + int i; + + for(i=0; i>W_AS; + } +} + +static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ + int i; + + for(i=0; i>W_CS; + } +} + +static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ + int i; + + for(i=0; i>W_BS; +#else + b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; +#endif + } +} + +static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ + int i; + + for(i=0; i>W_DS; + } +} + +void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ + int i; + + for(i=0; i>W_DS; + b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; +#ifdef liftS + b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; +#else + b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; +#endif + b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; + } +} + +static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ + cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); + cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); + cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); + cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); + cs->y = -3; +} + +static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ + cs->b0 = buffer + mirror(-3-1, height-1)*stride; + cs->b1 = buffer + mirror(-3 , height-1)*stride; + cs->b2 = buffer + mirror(-3+1, height-1)*stride; + cs->b3 = buffer + mirror(-3+2, height-1)*stride; + cs->y = -3; +} + +static void spatial_compose97i_dy_buffered(DWTContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ + int y = cs->y; + + IDWTELEM *b0= cs->b0; + IDWTELEM *b1= cs->b1; + IDWTELEM *b2= cs->b2; + IDWTELEM *b3= cs->b3; + IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); + IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); + + if(y>0 && y+4vertical_compose97i(b0, b1, b2, b3, b4, b5, width); + }else{ + if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); + if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); + if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); + if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); + } + + if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); + if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); + + cs->b0=b2; + cs->b1=b3; + cs->b2=b4; + cs->b3=b5; + cs->y += 2; +} + +static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ + int y = cs->y; + IDWTELEM *b0= cs->b0; + IDWTELEM *b1= cs->b1; + IDWTELEM *b2= cs->b2; + IDWTELEM *b3= cs->b3; + IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; + IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; + + if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); + if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); + if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); + if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); + + if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); + if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); + + cs->b0=b2; + cs->b1=b3; + cs->b2=b4; + cs->b3=b5; + cs->y += 2; +} + +static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ + DWTCompose cs; + spatial_compose97i_init(&cs, buffer, height, stride); + while(cs.y <= height) + spatial_compose97i_dy(&cs, buffer, width, height, stride); +} + +void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ + int level; + for(level=decomposition_count-1; level>=0; level--){ + switch(type){ + case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<>level, stride_line<=0; level--){ + while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ + switch(type){ + case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<>level, height>>level, stride_line<=0; level--){ + switch(type){ + case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<>level, stride<=0; level--){ + while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ + switch(type){ + case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<>level, height>>level, stride<>(dec_count-level); + int sx= (ori&1) ? size : 0; + int stride= 32<<(dec_count-level); + int sy= (ori&2) ? stride>>1 : 0; + + for(i=0; i=0); + return s>>9; +} + +static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 8, h, 1); +} + +static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 8, h, 0); +} + +static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 16, h, 1); +} + +static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 16, h, 0); +} + +int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 32, h, 1); +} + +int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 32, h, 0); +} + +void ff_dsputil_init_dwt(DSPContext *c) +{ + c->w53[0]= w53_16_c; + c->w53[1]= w53_8_c; + c->w97[0]= w97_16_c; + c->w97[1]= w97_8_c; +} + +void ff_dwt_init(DWTContext *c) +{ + c->vertical_compose97i = ff_snow_vertical_compose97i; + c->horizontal_compose97i = ff_snow_horizontal_compose97i; + c->inner_add_yblock = ff_snow_inner_add_yblock; + + if (ARCH_X86) ff_dwt_init_x86(c); +} diff --git a/libavcodec/dwt.h b/libavcodec/dwt.h new file mode 100644 index 0000000000..d9a8b16135 --- /dev/null +++ b/libavcodec/dwt.h @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2004-2010 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DWT_H +#define AVCODEC_DWT_H + +#include + +typedef int DWTELEM; +typedef short IDWTELEM; + +typedef struct { + IDWTELEM *b0; + IDWTELEM *b1; + IDWTELEM *b2; + IDWTELEM *b3; + int y; +} DWTCompose; + +/** Used to minimize the amount of memory used in order to optimize cache performance. **/ +typedef struct slice_buffer_s { + IDWTELEM * * line; ///< For use by idwt and predict_slices. + IDWTELEM * * data_stack; ///< Used for internal purposes. + int data_stack_top; + int line_count; + int line_width; + int data_count; + IDWTELEM * base_buffer; ///< Buffer that this structure is caching. +} slice_buffer; + +typedef struct DWTContext { + void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); + void (*horizontal_compose97i)(IDWTELEM *b, int width); + void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); +} DWTContext; + +#define MAX_DECOMPOSITIONS 8 + +#define DWT_97 0 +#define DWT_53 1 + +#define liftS lift +#if 1 +#define W_AM 3 +#define W_AO 0 +#define W_AS 1 + +#undef liftS +#define W_BM 1 +#define W_BO 8 +#define W_BS 4 + +#define W_CM 1 +#define W_CO 0 +#define W_CS 0 + +#define W_DM 3 +#define W_DO 4 +#define W_DS 3 +#elif 0 +#define W_AM 55 +#define W_AO 16 +#define W_AS 5 + +#define W_BM 3 +#define W_BO 32 +#define W_BS 6 + +#define W_CM 127 +#define W_CO 64 +#define W_CS 7 + +#define W_DM 7 +#define W_DO 8 +#define W_DS 4 +#elif 0 +#define W_AM 97 +#define W_AO 32 +#define W_AS 6 + +#define W_BM 63 +#define W_BO 512 +#define W_BS 10 + +#define W_CM 13 +#define W_CO 8 +#define W_CS 4 + +#define W_DM 15 +#define W_DO 16 +#define W_DS 5 + +#else + +#define W_AM 203 +#define W_AO 64 +#define W_AS 7 + +#define W_BM 217 +#define W_BO 2048 +#define W_BS 12 + +#define W_CM 113 +#define W_CO 64 +#define W_CS 7 + +#define W_DM 227 +#define W_DO 128 +#define W_DS 9 +#endif + +#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) +//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) + +void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer); +void slice_buffer_release(slice_buffer * buf, int line); +void slice_buffer_flush(slice_buffer * buf); +void slice_buffer_destroy(slice_buffer * buf); +IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line); + +void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); +void ff_snow_horizontal_compose97i(IDWTELEM *b, int width); +void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); + +int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h); +int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h); + +void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); + +void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count); +void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y); +void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count); +void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y); +void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count); + +void ff_dwt_init(DWTContext *c); +void ff_dwt_init_x86(DWTContext *c); + +#endif /* AVCODEC_DWT_H */ diff --git a/libavcodec/ivi_dsp.c b/libavcodec/ivi_dsp.c index f18f378aef..5aa8f94c99 100644 --- a/libavcodec/ivi_dsp.c +++ b/libavcodec/ivi_dsp.c @@ -28,6 +28,7 @@ #include "avcodec.h" #include "dsputil.h" +#include "dwt.h" #include "ivi_common.h" #include "ivi_dsp.h" diff --git a/libavcodec/snow.c b/libavcodec/snow.c index d51c998097..a4758b69eb 100644 --- a/libavcodec/snow.c +++ b/libavcodec/snow.c @@ -21,6 +21,7 @@ #include "libavutil/intmath.h" #include "avcodec.h" #include "dsputil.h" +#include "dwt.h" #include "snow.h" #include "rangecoder.h" @@ -440,6 +441,7 @@ typedef struct SnowContext{ AVCodecContext *avctx; RangeCoder c; DSPContext dsp; + DWTContext dwt; AVFrame new_picture; AVFrame input_picture; ///< new_picture with the internal linesizes AVFrame current_picture; @@ -494,85 +496,6 @@ typedef struct SnowContext{ uint8_t *scratchbuf; }SnowContext; -typedef struct { - IDWTELEM *b0; - IDWTELEM *b1; - IDWTELEM *b2; - IDWTELEM *b3; - int y; -} DWTCompose; - -#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) -//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) - -static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) -{ - int i; - - buf->base_buffer = base_buffer; - buf->line_count = line_count; - buf->line_width = line_width; - buf->data_count = max_allocated_lines; - buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); - buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); - - for(i = 0; i < max_allocated_lines; i++){ - buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); - } - - buf->data_stack_top = max_allocated_lines - 1; -} - -static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) -{ - IDWTELEM * buffer; - - assert(buf->data_stack_top >= 0); -// assert(!buf->line[line]); - if (buf->line[line]) - return buf->line[line]; - - buffer = buf->data_stack[buf->data_stack_top]; - buf->data_stack_top--; - buf->line[line] = buffer; - - return buffer; -} - -static void slice_buffer_release(slice_buffer * buf, int line) -{ - IDWTELEM * buffer; - - assert(line >= 0 && line < buf->line_count); - assert(buf->line[line]); - - buffer = buf->line[line]; - buf->data_stack_top++; - buf->data_stack[buf->data_stack_top] = buffer; - buf->line[line] = NULL; -} - -static void slice_buffer_flush(slice_buffer * buf) -{ - int i; - for(i = 0; i < buf->line_count; i++){ - if (buf->line[i]) - slice_buffer_release(buf, i); - } -} - -static void slice_buffer_destroy(slice_buffer * buf) -{ - int i; - slice_buffer_flush(buf); - - for(i = buf->data_count - 1; i >= 0; i--){ - av_freep(&buf->data_stack[i]); - } - av_freep(&buf->data_stack); - av_freep(&buf->line); -} - #ifdef __sgi // Avoid a name clash on SGI IRIX #undef qexp @@ -580,14 +503,6 @@ static void slice_buffer_destroy(slice_buffer * buf) #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 static uint8_t qexp[QROOT]; -static inline int mirror(int v, int m){ - while((unsigned)v > (unsigned)m){ - v=-v; - if(v<0) v+= 2*m; - } - return v; -} - static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ int i; @@ -709,605 +624,6 @@ static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ return v; } -static av_always_inline void -lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, - int dst_step, int src_step, int ref_step, - int width, int mul, int add, int shift, - int highpass, int inverse){ - const int mirror_left= !highpass; - const int mirror_right= (width&1) ^ highpass; - const int w= (width>>1) - 1 + (highpass & width); - int i; - -#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) - if(mirror_left){ - dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); - dst += dst_step; - src += src_step; - } - - for(i=0; i>shift), - inverse); - } - - if(mirror_right){ - dst[w*dst_step] = - LIFT(src[w*src_step], - ((mul*2*ref[w*ref_step]+add)>>shift), - inverse); - } -} - -static av_always_inline void -inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, - int dst_step, int src_step, int ref_step, - int width, int mul, int add, int shift, - int highpass, int inverse){ - const int mirror_left= !highpass; - const int mirror_right= (width&1) ^ highpass; - const int w= (width>>1) - 1 + (highpass & width); - int i; - -#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) - if(mirror_left){ - dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); - dst += dst_step; - src += src_step; - } - - for(i=0; i>shift), - inverse); - } - - if(mirror_right){ - dst[w*dst_step] = - LIFT(src[w*src_step], - ((mul*2*ref[w*ref_step]+add)>>shift), - inverse); - } -} - -#ifndef liftS -static av_always_inline void -liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, - int dst_step, int src_step, int ref_step, - int width, int mul, int add, int shift, - int highpass, int inverse){ - const int mirror_left= !highpass; - const int mirror_right= (width&1) ^ highpass; - const int w= (width>>1) - 1 + (highpass & width); - int i; - - assert(shift == 4); -#define LIFTS(src, ref, inv) \ - ((inv) ? \ - (src) + (((ref) + 4*(src))>>shift): \ - -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) - if(mirror_left){ - dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); - dst += dst_step; - src += src_step; - } - - for(i=0; i>1) - 1 + (highpass & width); - int i; - - assert(shift == 4); -#define LIFTS(src, ref, inv) \ - ((inv) ? \ - (src) + (((ref) + 4*(src))>>shift): \ - -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) - if(mirror_left){ - dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); - dst += dst_step; - src += src_step; - } - - for(i=0; i>1; - int x; - const int w2= (width+1)>>1; - - for(x=0; x>1; - A4 += (A1 + 1)>>1; - b[0+width2] = A1; - b[0 ] = A4; - for(x=1; x+1>1; - A2 += (A1 + A3 + 2)>>2; - b[x+width2] = A3; - b[x ] = A2; - - A1= temp[x+1+width2]; - A2= temp[x+2 ]; - A1 -= (A2 + A4)>>1; - A4 += (A1 + A3 + 2)>>2; - b[x+1+width2] = A1; - b[x+1 ] = A4; - } - A3= temp[width-1]; - A3 -= A2; - A2 += (A1 + A3 + 2)>>2; - b[width -1] = A3; - b[width2-1] = A2; - } -#else - lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); - lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); -#endif /* 0 */ -} - -static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ - int i; - - for(i=0; i>1; - } -} - -static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ - int i; - - for(i=0; i>2; - } -} - -static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ - int y; - DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; - DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; - - for(y=-2; y>1; - - lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); - liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); - lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); - lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); -} - - -static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ - int i; - - for(i=0; i>W_AS; - } -} - -static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ - int i; - - for(i=0; i>W_CS; - } -} - -static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ - int i; - - for(i=0; i>W_BS; -#else - b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23); -#endif - } -} - -static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ - int i; - - for(i=0; i>W_DS; - } -} - -static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ - int y; - DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; - DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; - DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; - DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; - - for(y=-4; y>level, height>>level, stride<>level, height>>level, stride<>1; - const int w2= (width+1)>>1; - int x; - - for(x=0; x>1); - for(x=2; x>2); - b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1); - } - if(width&1){ - b[x ] = temp[x ] - ((temp[x-1]+1)>>1); - b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1); - }else - b[x-1] = temp[x-1] + b[x-2]; -} - -static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ - int i; - - for(i=0; i>1; - } -} - -static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ - int i; - - for(i=0; i>2; - } -} - -static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ - cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); - cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); - cs->y = -1; -} - -static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ - cs->b0 = buffer + mirror(-1-1, height-1)*stride; - cs->b1 = buffer + mirror(-1 , height-1)*stride; - cs->y = -1; -} - -static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ - int y= cs->y; - - IDWTELEM *b0= cs->b0; - IDWTELEM *b1= cs->b1; - IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); - IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); - - if(y+1<(unsigned)height && y<(unsigned)height){ - int x; - - for(x=0; x>2; - b1[x] += (b0[x] + b2[x])>>1; - } - }else{ - if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); - if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); - } - - if(y-1<(unsigned)height) horizontal_compose53i(b0, width); - if(y+0<(unsigned)height) horizontal_compose53i(b1, width); - - cs->b0 = b2; - cs->b1 = b3; - cs->y += 2; -} - -static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ - int y= cs->y; - IDWTELEM *b0= cs->b0; - IDWTELEM *b1= cs->b1; - IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; - IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; - - if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); - if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); - - if(y-1<(unsigned)height) horizontal_compose53i(b0, width); - if(y+0<(unsigned)height) horizontal_compose53i(b1, width); - - cs->b0 = b2; - cs->b1 = b3; - cs->y += 2; -} - -static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ - DWTCompose cs; - spatial_compose53i_init(&cs, buffer, height, stride); - while(cs.y <= height) - spatial_compose53i_dy(&cs, buffer, width, height, stride); -} - - -void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ - IDWTELEM temp[width]; - const int w2= (width+1)>>1; - -#if 0 //maybe more understadable but slower - inv_lift (temp , b , b +w2, 2, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); - inv_lift (temp+1 , b +w2, temp , 2, 1, 2, width, W_CM, W_CO, W_CS, 1, 1); - - inv_liftS(b , temp , temp+1 , 2, 2, 2, width, W_BM, W_BO, W_BS, 0, 1); - inv_lift (b+1 , temp+1 , b , 2, 2, 2, width, W_AM, W_AO, W_AS, 1, 0); -#else - int x; - temp[0] = b[0] - ((3*b[w2]+2)>>2); - for(x=1; x<(width>>1); x++){ - temp[2*x ] = b[x ] - ((3*(b [x+w2-1] + b[x+w2])+4)>>3); - temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x]; - } - if(width&1){ - temp[2*x ] = b[x ] - ((3*b [x+w2-1]+2)>>2); - temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x]; - }else - temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2]; - - b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3); - for(x=2; x>4); - b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1); - } - if(width&1){ - b[x ] = temp[x ] + ((2*temp[x ] + temp[x-1]+4)>>3); - b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1); - }else - b[x-1] = temp[x-1] + 3*b [x-2]; -#endif -} - -static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ - int i; - - for(i=0; i>W_AS; - } -} - -static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ - int i; - - for(i=0; i>W_CS; - } -} - -static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ - int i; - - for(i=0; i>W_BS; -#else - b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; -#endif - } -} - -static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ - int i; - - for(i=0; i>W_DS; - } -} - -void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ - int i; - - for(i=0; i>W_DS; - b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; -#ifdef liftS - b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; -#else - b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; -#endif - b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; - } -} - -static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ - cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); - cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); - cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); - cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); - cs->y = -3; -} - -static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ - cs->b0 = buffer + mirror(-3-1, height-1)*stride; - cs->b1 = buffer + mirror(-3 , height-1)*stride; - cs->b2 = buffer + mirror(-3+1, height-1)*stride; - cs->b3 = buffer + mirror(-3+2, height-1)*stride; - cs->y = -3; -} - -static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ - int y = cs->y; - - IDWTELEM *b0= cs->b0; - IDWTELEM *b1= cs->b1; - IDWTELEM *b2= cs->b2; - IDWTELEM *b3= cs->b3; - IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); - IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); - - if(y>0 && y+4vertical_compose97i(b0, b1, b2, b3, b4, b5, width); - }else{ - if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); - if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); - if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); - if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); - } - - if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); - if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); - - cs->b0=b2; - cs->b1=b3; - cs->b2=b4; - cs->b3=b5; - cs->y += 2; -} - -static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ - int y = cs->y; - IDWTELEM *b0= cs->b0; - IDWTELEM *b1= cs->b1; - IDWTELEM *b2= cs->b2; - IDWTELEM *b3= cs->b3; - IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; - IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; - - if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); - if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); - if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); - if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); - - if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); - if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); - - cs->b0=b2; - cs->b1=b3; - cs->b2=b4; - cs->b3=b5; - cs->y += 2; -} - -static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ - DWTCompose cs; - spatial_compose97i_init(&cs, buffer, height, stride); - while(cs.y <= height) - spatial_compose97i_dy(&cs, buffer, width, height, stride); -} - -static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ - int level; - for(level=decomposition_count-1; level>=0; level--){ - switch(type){ - case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<>level, stride_line<=0; level--){ - while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ - switch(type){ - case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<>level, height>>level, stride_line<width; const int h= b->height; @@ -2043,7 +1359,7 @@ static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer } #else if(sliced){ - s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); + s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); }else{ for(y=0; ymax_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe dsputil_init(&s->dsp, avctx); + ff_dwt_init(&s->dwt); #define mcf(dx,dy)\ s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ @@ -2865,7 +2182,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac } for(; yddsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); + ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); } if(s->qlog == LOSSLESS_QLOG){ @@ -3607,41 +2924,6 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ return distortion + rate*penalty_factor; } -static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ - int level; - for(level=decomposition_count-1; level>=0; level--){ - switch(type){ - case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<>level, stride<=0; level--){ - while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ - switch(type){ - case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<>level, height>>level, stride<width; const int h= b->height; diff --git a/libavcodec/snow.h b/libavcodec/snow.h index ee0d0730d0..7d847e4b37 100644 --- a/libavcodec/snow.h +++ b/libavcodec/snow.h @@ -23,10 +23,10 @@ #define AVCODEC_SNOW_H #include "dsputil.h" +#include "dwt.h" #define MID_STATE 128 -#define MAX_DECOMPOSITIONS 8 #define MAX_PLANES 4 #define QSHIFT 5 #define QROOT (1<horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; -#if HAVE_7REGS - c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; -#endif - c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; - } - else{ - if(mm_flags & FF_MM_MMX2){ - c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; -#if HAVE_7REGS - c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; -#endif - } - c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; - } -#endif - if(mm_flags & FF_MM_3DNOW){ c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; c->vector_fmul = vector_fmul_3dnow; diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h index 42d9c12c48..7d1bf7fae6 100644 --- a/libavcodec/x86/dsputil_mmx.h +++ b/libavcodec/x86/dsputil_mmx.h @@ -167,15 +167,6 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx); void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd); -void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width); -void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width); -void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); -void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); -void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, - int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); -void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, - int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); - void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag, double *autoc); diff --git a/libavcodec/x86/snowdsp_mmx.c b/libavcodec/x86/snowdsp_mmx.c index 1e1d2e22d8..263f0bbf69 100644 --- a/libavcodec/x86/snowdsp_mmx.c +++ b/libavcodec/x86/snowdsp_mmx.c @@ -22,9 +22,10 @@ #include "libavutil/x86_cpu.h" #include "libavcodec/avcodec.h" #include "libavcodec/snow.h" +#include "libavcodec/dwt.h" #include "dsputil_mmx.h" -void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ +static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ const int w2= (width+1)>>1; DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1]; const int w_l= (width>>1); @@ -213,7 +214,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ } } -void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ +static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ const int w2= (width+1)>>1; IDWTELEM temp[width >> 1]; const int w_l= (width>>1); @@ -436,7 +437,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ "movdqa %%"s2", %%"t2" \n\t"\ "movdqa %%"s3", %%"t3" \n\t" -void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ +static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ x86_reg i = width; while(i & 0x1F) @@ -534,7 +535,7 @@ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, "movq %%"s3", %%"t3" \n\t" -void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ +static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ x86_reg i = width; while(i & 15) { @@ -847,7 +848,7 @@ snow_inner_add_yblock_mmx_mix("16", "8") snow_inner_add_yblock_mmx_end("32") } -void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, +static void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ if (b_w == 16) @@ -861,7 +862,7 @@ void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, u ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); } -void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, +static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ if (b_w == 16) inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); @@ -870,3 +871,27 @@ void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, ui else ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); } + +void ff_dwt_init_x86(DWTContext *c) +{ + mm_flags = mm_support(); + + if (mm_flags & FF_MM_MMX) { + if(mm_flags & FF_MM_SSE2 & 0){ + c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; +#if HAVE_7REGS + c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; +#endif + c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; + } + else{ + if(mm_flags & FF_MM_MMX2){ + c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; +#if HAVE_7REGS + c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; +#endif + } + c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; + } + } +}