From 7a249625fd63d11bc85a61a42dbc142841cf85b7 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sun, 15 Jul 2012 22:12:30 +0200 Subject: [PATCH 1/9] os_support: K&R formatting cosmetics --- libavformat/os_support.c | 120 +++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 49 deletions(-) diff --git a/libavformat/os_support.c b/libavformat/os_support.c index 9992a631c5..7618708283 100644 --- a/libavformat/os_support.c +++ b/libavformat/os_support.c @@ -75,21 +75,22 @@ int ff_win32_open(const char *filename_utf8, int oflag, int pmode) #if !HAVE_INET_ATON #include -int ff_inet_aton (const char * str, struct in_addr * add) +int ff_inet_aton(const char *str, struct in_addr *add) { unsigned int add1 = 0, add2 = 0, add3 = 0, add4 = 0; if (sscanf(str, "%d.%d.%d.%d", &add1, &add2, &add3, &add4) != 4) return 0; - if (!add1 || (add1|add2|add3|add4) > 255) return 0; + if (!add1 || (add1 | add2 | add3 | add4) > 255) + return 0; add->s_addr = htonl((add1 << 24) + (add2 << 16) + (add3 << 8) + add4); return 1; } #else -int ff_inet_aton (const char * str, struct in_addr * add) +int ff_inet_aton(const char *str, struct in_addr *add) { return inet_aton(str, add); } @@ -97,7 +98,7 @@ int ff_inet_aton (const char * str, struct in_addr * add) #if !HAVE_GETADDRINFO int ff_getaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, struct addrinfo **res) + const struct addrinfo *hints, struct addrinfo **res) { struct hostent *h = NULL; struct addrinfo *ai; @@ -114,7 +115,7 @@ int ff_getaddrinfo(const char *node, const char *service, #endif *res = NULL; - sin = av_mallocz(sizeof(struct sockaddr_in)); + sin = av_mallocz(sizeof(struct sockaddr_in)); if (!sin) return EAI_FAIL; sin->sin_family = AF_INET; @@ -133,9 +134,9 @@ int ff_getaddrinfo(const char *node, const char *service, memcpy(&sin->sin_addr, h->h_addr_list[0], sizeof(struct in_addr)); } } else { - if (hints && (hints->ai_flags & AI_PASSIVE)) { + if (hints && (hints->ai_flags & AI_PASSIVE)) sin->sin_addr.s_addr = INADDR_ANY; - } else + else sin->sin_addr.s_addr = INADDR_LOOPBACK; } @@ -150,16 +151,22 @@ int ff_getaddrinfo(const char *node, const char *service, return EAI_FAIL; } - *res = ai; - ai->ai_family = AF_INET; + *res = ai; + ai->ai_family = AF_INET; ai->ai_socktype = hints ? hints->ai_socktype : 0; switch (ai->ai_socktype) { - case SOCK_STREAM: ai->ai_protocol = IPPROTO_TCP; break; - case SOCK_DGRAM: ai->ai_protocol = IPPROTO_UDP; break; - default: ai->ai_protocol = 0; break; + case SOCK_STREAM: + ai->ai_protocol = IPPROTO_TCP; + break; + case SOCK_DGRAM: + ai->ai_protocol = IPPROTO_UDP; + break; + default: + ai->ai_protocol = 0; + break; } - ai->ai_addr = (struct sockaddr *)sin; + ai->ai_addr = (struct sockaddr *)sin; ai->ai_addrlen = sizeof(struct sockaddr_in); if (hints && (hints->ai_flags & AI_CANONNAME)) ai->ai_canonname = h ? av_strdup(h->h_name) : NULL; @@ -222,7 +229,7 @@ int ff_getnameinfo(const struct sockaddr *sa, int salen, a = ntohl(sin->sin_addr.s_addr); snprintf(host, hostlen, "%d.%d.%d.%d", ((a >> 24) & 0xff), ((a >> 16) & 0xff), - ((a >> 8) & 0xff), ( a & 0xff)); + ((a >> 8) & 0xff), (a & 0xff)); } } @@ -231,9 +238,9 @@ int ff_getnameinfo(const struct sockaddr *sa, int salen, if (!(flags & NI_NUMERICSERV)) ent = getservbyport(sin->sin_port, flags & NI_DGRAM ? "udp" : "tcp"); - if (ent) { + if (ent) snprintf(serv, servlen, "%s", ent->s_name); - } else + else snprintf(serv, servlen, "%d", ntohs(sin->sin_port)); } @@ -244,18 +251,28 @@ int ff_getnameinfo(const struct sockaddr *sa, int salen, #if !HAVE_GETADDRINFO || HAVE_WINSOCK2_H const char *ff_gai_strerror(int ecode) { - switch(ecode) { - case EAI_AGAIN : return "Temporary failure in name resolution"; - case EAI_BADFLAGS: return "Invalid flags for ai_flags"; - case EAI_FAIL : return "A non-recoverable error occurred"; - case EAI_FAMILY : return "The address family was not recognized or the address length was invalid for the specified family"; - case EAI_MEMORY : return "Memory allocation failure"; + switch (ecode) { + case EAI_AGAIN: + return "Temporary failure in name resolution"; + case EAI_BADFLAGS: + return "Invalid flags for ai_flags"; + case EAI_FAIL: + return "A non-recoverable error occurred"; + case EAI_FAMILY: + return "The address family was not recognized or the address " + "length was invalid for the specified family"; + case EAI_MEMORY: + return "Memory allocation failure"; #if EAI_NODATA != EAI_NONAME - case EAI_NODATA : return "No address associated with hostname"; + case EAI_NODATA: + return "No address associated with hostname"; #endif - case EAI_NONAME : return "The name does not resolve for the supplied parameters"; - case EAI_SERVICE: return "servname not supported for ai_socktype"; - case EAI_SOCKTYPE: return "ai_socktype not supported"; + case EAI_NONAME: + return "The name does not resolve for the supplied parameters"; + case EAI_SERVICE: + return "servname not supported for ai_socktype"; + case EAI_SOCKTYPE: + return "ai_socktype not supported"; } return "Unknown error"; @@ -265,13 +282,13 @@ const char *ff_gai_strerror(int ecode) int ff_socket_nonblock(int socket, int enable) { #if HAVE_WINSOCK2_H - u_long param = enable; - return ioctlsocket(socket, FIONBIO, ¶m); + u_long param = enable; + return ioctlsocket(socket, FIONBIO, ¶m); #else - if (enable) - return fcntl(socket, F_SETFL, fcntl(socket, F_GETFL) | O_NONBLOCK); - else - return fcntl(socket, F_SETFL, fcntl(socket, F_GETFL) & ~O_NONBLOCK); + if (enable) + return fcntl(socket, F_SETFL, fcntl(socket, F_GETFL) | O_NONBLOCK); + else + return fcntl(socket, F_SETFL, fcntl(socket, F_GETFL) & ~O_NONBLOCK); #endif } @@ -297,7 +314,7 @@ int ff_poll(struct pollfd *fds, nfds_t numfds, int timeout) FD_ZERO(&exception_set); n = 0; - for(i = 0; i < numfds; i++) { + for (i = 0; i < numfds; i++) { if (fds[i].fd < 0) continue; #if !HAVE_WINSOCK2_H @@ -307,38 +324,43 @@ int ff_poll(struct pollfd *fds, nfds_t numfds, int timeout) } #endif - if (fds[i].events & POLLIN) FD_SET(fds[i].fd, &read_set); - if (fds[i].events & POLLOUT) FD_SET(fds[i].fd, &write_set); - if (fds[i].events & POLLERR) FD_SET(fds[i].fd, &exception_set); + if (fds[i].events & POLLIN) + FD_SET(fds[i].fd, &read_set); + if (fds[i].events & POLLOUT) + FD_SET(fds[i].fd, &write_set); + if (fds[i].events & POLLERR) + FD_SET(fds[i].fd, &exception_set); if (fds[i].fd >= n) n = fds[i].fd + 1; - }; + } if (n == 0) /* Hey!? Nothing to poll, in fact!!! */ return 0; - if (timeout < 0) + if (timeout < 0) { rc = select(n, &read_set, &write_set, &exception_set, NULL); - else { - struct timeval tv; - - tv.tv_sec = timeout / 1000; + } else { + struct timeval tv; + tv.tv_sec = timeout / 1000; tv.tv_usec = 1000 * (timeout % 1000); - rc = select(n, &read_set, &write_set, &exception_set, &tv); - }; + rc = select(n, &read_set, &write_set, &exception_set, &tv); + } if (rc < 0) return rc; - for(i = 0; i < numfds; i++) { + for (i = 0; i < numfds; i++) { fds[i].revents = 0; - if (FD_ISSET(fds[i].fd, &read_set)) fds[i].revents |= POLLIN; - if (FD_ISSET(fds[i].fd, &write_set)) fds[i].revents |= POLLOUT; - if (FD_ISSET(fds[i].fd, &exception_set)) fds[i].revents |= POLLERR; - }; + if (FD_ISSET(fds[i].fd, &read_set)) + fds[i].revents |= POLLIN; + if (FD_ISSET(fds[i].fd, &write_set)) + fds[i].revents |= POLLOUT; + if (FD_ISSET(fds[i].fd, &exception_set)) + fds[i].revents |= POLLERR; + } return rc; } From 47876d661a1e59ac29172c2fe6b01aa16d195313 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Mon, 16 Jul 2012 16:51:33 +0200 Subject: [PATCH 2/9] configure: cosmetics: drop some unnecessary backslashes --- configure | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/configure b/configure index 93a68aa568..74378ba7ae 100755 --- a/configure +++ b/configure @@ -2846,9 +2846,9 @@ if enabled network; then if check_header arpa/inet.h ; then check_func closesocket elif check_header winsock2.h ; then - check_func_headers winsock2.h closesocket -lws2 && \ - network_extralibs="-lws2" || \ - { check_func_headers winsock2.h closesocket -lws2_32 && \ + check_func_headers winsock2.h closesocket -lws2 && + network_extralibs="-lws2" || + { check_func_headers winsock2.h closesocket -lws2_32 && network_extralibs="-lws2_32"; } check_func_headers ws2tcpip.h getaddrinfo $network_extralibs check_type ws2tcpip.h socklen_t @@ -3102,7 +3102,7 @@ check_ldflags -Wl,--warn-common check_ldflags -Wl,-rpath-link=libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil:libavresample test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic -enabled xmm_clobber_test && \ +enabled xmm_clobber_test && check_ldflags -Wl,--wrap,avcodec_open2 \ -Wl,--wrap,avcodec_decode_audio4 \ -Wl,--wrap,avcodec_decode_video2 \ @@ -3110,7 +3110,7 @@ enabled xmm_clobber_test && \ -Wl,--wrap,avcodec_encode_audio2 \ -Wl,--wrap,avcodec_encode_video \ -Wl,--wrap,avcodec_encode_subtitle \ - -Wl,--wrap,sws_scale || \ + -Wl,--wrap,sws_scale || disable xmm_clobber_test echo "X{};" > $TMPV @@ -3157,8 +3157,8 @@ if enabled icc; then # icc 11.0 and 11.1 work with ebp_available, but don't pass the test enable ebp_available if enabled x86_32; then - test ${icc_version%%.*} -ge 11 && \ - check_cflags -falign-stack=maintain-16-byte || \ + test ${icc_version%%.*} -ge 11 && + check_cflags -falign-stack=maintain-16-byte || disable aligned_stack fi elif enabled ccc; then From 87246953d8424b52aeb975f22c18f9ee690751ba Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Tue, 17 Jul 2012 01:32:01 +0200 Subject: [PATCH 3/9] configure: mark libfdk-aac as nonfree --- configure | 1 + 1 file changed, 1 insertion(+) diff --git a/configure b/configure index 74378ba7ae..df038889cb 100755 --- a/configure +++ b/configure @@ -2626,6 +2626,7 @@ die_license_disabled gpl libxvid die_license_disabled gpl x11grab die_license_disabled nonfree libfaac +die_license_disabled nonfree libfdk_aac die_license_disabled nonfree openssl die_license_disabled version3 libopencore_amrnb From c547e5ff98debb8def8f0343bde785791872ff0e Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Mon, 16 Jul 2012 18:19:13 +0200 Subject: [PATCH 4/9] configure: clarify external library section of help output --- configure | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/configure b/configure index df038889cb..6333794cec 100755 --- a/configure +++ b/configure @@ -169,10 +169,10 @@ External library support: --enable-libcdio enable audio CD grabbing with libcdio --enable-libdc1394 enable IIDC-1394 grabbing using libdc1394 and libraw1394 [no] - --enable-libfaac enable FAAC support via libfaac [no] - --enable-libfdk-aac enable AAC support via libfdk-aac [no] + --enable-libfaac enable AAC encoding via libfaac [no] + --enable-libfdk-aac enable AAC encoding via libfdk-aac [no] --enable-libfreetype enable libfreetype [no] - --enable-libgsm enable GSM support via libgsm [no] + --enable-libgsm enable GSM de/encoding via libgsm [no] --enable-libilbc enable iLBC de/encoding via libilbc [no] --enable-libmp3lame enable MP3 encoding via libmp3lame [no] --enable-libopencore-amrnb enable AMR-NB de/encoding via libopencore-amrnb [no] @@ -181,14 +181,13 @@ External library support: --enable-libopenjpeg enable JPEG 2000 de/encoding via OpenJPEG [no] --enable-libpulse enable Pulseaudio input via libpulse [no] --enable-librtmp enable RTMP[E] support via librtmp [no] - --enable-libschroedinger enable Dirac support via libschroedinger [no] - --enable-libspeex enable Speex support via libspeex [no] + --enable-libschroedinger enable Dirac de/encoding via libschroedinger [no] + --enable-libspeex enable Speex de/encoding via libspeex [no] --enable-libtheora enable Theora encoding via libtheora [no] --enable-libvo-aacenc enable AAC encoding via libvo-aacenc [no] --enable-libvo-amrwbenc enable AMR-WB encoding via libvo-amrwbenc [no] - --enable-libvorbis enable Vorbis encoding via libvorbis, - native implementation exists [no] - --enable-libvpx enable VP8 support via libvpx [no] + --enable-libvorbis enable Vorbis encoding via libvorbis [no] + --enable-libvpx enable VP8 de/encoding via libvpx [no] --enable-libx264 enable H.264 encoding via x264 [no] --enable-libxavs enable AVS encoding via xavs [no] --enable-libxvid enable Xvid encoding via xvidcore, From 3c6c19184c46f1d0d83f5aecb1bfaaa71d1cd367 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Tue, 17 Jul 2012 17:34:58 +0200 Subject: [PATCH 5/9] tscc2: fix typo in DCT --- libavcodec/tscc2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/tscc2.c b/libavcodec/tscc2.c index 972dc43fcb..91f79e249d 100644 --- a/libavcodec/tscc2.c +++ b/libavcodec/tscc2.c @@ -91,7 +91,7 @@ static av_cold int init_vlcs(TSCC2Context *c) #define DCT1D(d0, d1, d2, d3, s0, s1, s2, s3, OP) \ OP(d0, 5 * ((s0) + (s1) + (s2)) + 2 * (s3)); \ OP(d1, 5 * ((s0) - (s2) - (s3)) + 2 * (s1)); \ - OP(d2, 5 * ((s0) - (s2) + (s3)) - 2 * (s3)); \ + OP(d2, 5 * ((s0) - (s2) + (s3)) - 2 * (s1)); \ OP(d3, 5 * ((s0) - (s1) + (s2)) - 2 * (s3)); \ #define COL_OP(a, b) a = b From 4cfb0d871d4885a88061410ab8f3583018c1ac57 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Tue, 17 Jul 2012 18:12:41 +0200 Subject: [PATCH 6/9] tscc2: do not add/subtract 128 bias during DCT It turns out that the reference decoder subtracts 128 from DC during block decode but adds it back during reordering block with zigzag pattern. Transforming block with incorrect DC caused heavy visual artifacts for many quantisers. --- libavcodec/tscc2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/tscc2.c b/libavcodec/tscc2.c index 91f79e249d..5248c9f4ec 100644 --- a/libavcodec/tscc2.c +++ b/libavcodec/tscc2.c @@ -95,7 +95,7 @@ static av_cold int init_vlcs(TSCC2Context *c) OP(d3, 5 * ((s0) - (s1) + (s2)) - 2 * (s3)); \ #define COL_OP(a, b) a = b -#define ROW_OP(a, b) a = (((b) + 0x20) >> 6) + 0x80 +#define ROW_OP(a, b) a = ((b) + 0x20) >> 6 static void tscc2_idct4_put(int *in, int q[3], uint8_t *dst, int stride) { @@ -158,7 +158,7 @@ static int tscc2_decode_mb(TSCC2Context *c, int *q, int vlc_set, } dc = (dc + prev_dc) & 0xFF; prev_dc = dc; - c->block[0] = dc - 0x80; + c->block[0] = dc; nc = get_vlc2(gb, c->nc_vlc[vlc_set].table, 9, 1); if (nc == -1) From ab9f9876615fd856184912cf3863a80cf3a721b6 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 16 Jul 2012 12:50:14 +0100 Subject: [PATCH 7/9] build: add CONFIG_VP3DSP, reduce repetition in OBJS lists Signed-off-by: Mans Rullgard --- configure | 5 ++++- libavcodec/Makefile | 7 ++++--- libavcodec/arm/Makefile | 4 +--- libavcodec/x86/Makefile | 6 ++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/configure b/configure index 6333794cec..ca24e3ca88 100755 --- a/configure +++ b/configure @@ -1192,6 +1192,7 @@ CONFIG_EXTRA=" huffman lgplv3 lpc + vp3dsp " CMDLINE_SELECT=" @@ -1415,7 +1416,9 @@ vc1_vdpau_decoder_select="vdpau vc1_decoder" vc1image_decoder_select="vc1_decoder" vorbis_decoder_select="mdct" vorbis_encoder_select="mdct" -vp6_decoder_select="huffman" +vp3_decoder_select="vp3dsp" +vp5_decoder_select="vp3dsp" +vp6_decoder_select="huffman vp3dsp" vp6a_decoder_select="vp6_decoder" vp6f_decoder_select="vp6_decoder" vp8_decoder_select="h264pred h264qpel" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 2cc02b179f..c0a27567b5 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -55,6 +55,7 @@ OBJS-$(CONFIG_SINEWIN) += sinewin.o OBJS-$(CONFIG_VAAPI) += vaapi.o OBJS-$(CONFIG_VDA) += vda.o OBJS-$(CONFIG_VDPAU) += vdpau.o +OBJS-$(CONFIG_VP3DSP) += vp3dsp.o # decoders/encoders/hardware accelerators OBJS-$(CONFIG_A64MULTI_ENCODER) += a64multienc.o elbg.o @@ -428,11 +429,11 @@ OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbis.o \ vorbis_data.o xiph.o OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \ vorbis_data.o -OBJS-$(CONFIG_VP3_DECODER) += vp3.o vp3dsp.o +OBJS-$(CONFIG_VP3_DECODER) += vp3.o OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o vp56dsp.o \ - vp3dsp.o vp56rac.o + vp56rac.o OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o vp56dsp.o \ - vp3dsp.o vp6dsp.o vp56rac.o + vp6dsp.o vp56rac.o OBJS-$(CONFIG_VP8_DECODER) += vp8.o vp8dsp.o vp56rac.o OBJS-$(CONFIG_VQA_DECODER) += vqavideo.o OBJS-$(CONFIG_WAVPACK_DECODER) += wavpack.o diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index d2bdd50daa..60cb2072ae 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -75,13 +75,11 @@ NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_neon.o \ arm/rv40dsp_neon.o \ arm/h264cmc_neon.o \ -NEON-OBJS-$(CONFIG_VP3_DECODER) += arm/vp3dsp_neon.o +NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \ - arm/vp3dsp_neon.o \ NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_neon.o \ - arm/vp3dsp_neon.o \ NEON-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_neon.o \ arm/vp8dsp_neon.o diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 6464739d03..1949074c58 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -59,10 +59,8 @@ YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \ x86/rv40dsp.o YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_yasm.o -YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o -YASM-OBJS-$(CONFIG_VP5_DECODER) += x86/vp3dsp.o -YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp.o \ - x86/vp56dsp.o +YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o +YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp.o YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o YASM-OBJS += x86/dsputil_yasm.o \ From 28f9ab7029bd1a02f659995919f899f84ee7361b Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Tue, 17 Jul 2012 16:47:43 +0100 Subject: [PATCH 8/9] vp3: move idct and loop filter pointers to new vp3dsp context This moves all VP3-specific function pointers from dsputil to a new vp3dsp context. There is no reason to ever use the VP3 IDCT where an MPEG2 IDCT is expected or vice versa. Signed-off-by: Mans Rullgard --- libavcodec/arm/Makefile | 1 + libavcodec/arm/dsputil_init_neon.c | 21 ---------- libavcodec/arm/vp3dsp_init_arm.c | 45 +++++++++++++++++++++ libavcodec/arm/vp3dsp_neon.S | 26 ------------ libavcodec/dsputil.c | 12 ------ libavcodec/dsputil.h | 13 ------ libavcodec/ppc/Makefile | 5 +-- libavcodec/ppc/dsputil_altivec.h | 4 -- libavcodec/ppc/dsputil_ppc.c | 6 --- libavcodec/ppc/vp3dsp_altivec.c | 40 +++++++++--------- libavcodec/vp3.c | 22 +++++----- libavcodec/vp3dsp.c | 37 ++++++++++++----- libavcodec/vp3dsp.h | 40 ++++++++++++++++++ libavcodec/vp56.c | 10 ++--- libavcodec/vp56.h | 2 + libavcodec/x86/Makefile | 1 + libavcodec/x86/dsputil_mmx.c | 35 ---------------- libavcodec/x86/vp3dsp.asm | 4 -- libavcodec/x86/vp3dsp_init.c | 65 ++++++++++++++++++++++++++++++ 19 files changed, 220 insertions(+), 169 deletions(-) create mode 100644 libavcodec/arm/vp3dsp_init_arm.c create mode 100644 libavcodec/vp3dsp.h create mode 100644 libavcodec/x86/vp3dsp_init.c diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 60cb2072ae..a8e531cf18 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -11,6 +11,7 @@ ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o +OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 65db20d2b3..a132f6f993 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -29,11 +29,6 @@ void ff_simple_idct_neon(DCTELEM *data); void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); -void ff_vp3_idct_neon(DCTELEM *data); -void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); -void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); -void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const DCTELEM *data); - void ff_clear_block_neon(DCTELEM *block); void ff_clear_blocks_neon(DCTELEM *blocks); @@ -147,9 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); -void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *); -void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); - void ff_vector_fmul_window_neon(float *dst, const float *src0, const float *src1, const float *win, int len); void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, @@ -186,13 +178,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->idct_add = ff_simple_idct_add_neon; c->idct = ff_simple_idct_neon; c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; - } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || - CONFIG_VP6_DECODER) && - avctx->idct_algo == FF_IDCT_VP3) { - c->idct_put = ff_vp3_idct_put_neon; - c->idct_add = ff_vp3_idct_add_neon; - c->idct = ff_vp3_idct_neon; - c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; } } @@ -319,12 +304,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; } - if (CONFIG_VP3_DECODER) { - c->vp3_v_loop_filter = ff_vp3_v_loop_filter_neon; - c->vp3_h_loop_filter = ff_vp3_h_loop_filter_neon; - c->vp3_idct_dc_add = ff_vp3_idct_dc_add_neon; - } - c->vector_fmul_window = ff_vector_fmul_window_neon; c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; c->butterflies_float = ff_butterflies_float_neon; diff --git a/libavcodec/arm/vp3dsp_init_arm.c b/libavcodec/arm/vp3dsp_init_arm.c new file mode 100644 index 0000000000..ea99bfd2b3 --- /dev/null +++ b/libavcodec/arm/vp3dsp_init_arm.c @@ -0,0 +1,45 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/arm/cpu.h" +#include "libavcodec/vp3dsp.h" + +void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); +void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); +void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const DCTELEM *data); + +void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *); +void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); + +av_cold void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) { + c->idct_put = ff_vp3_idct_put_neon; + c->idct_add = ff_vp3_idct_add_neon; + c->idct_dc_add = ff_vp3_idct_dc_add_neon; + c->v_loop_filter = ff_vp3_v_loop_filter_neon; + c->h_loop_filter = ff_vp3_h_loop_filter_neon; + c->idct_perm = FF_TRANSPOSE_IDCT_PERM; + } +} diff --git a/libavcodec/arm/vp3dsp_neon.S b/libavcodec/arm/vp3dsp_neon.S index 2a9b25f34e..e09de57281 100644 --- a/libavcodec/arm/vp3dsp_neon.S +++ b/libavcodec/arm/vp3dsp_neon.S @@ -260,32 +260,6 @@ endfunc VP3_IDCT_END row VP3_IDCT_END col -function ff_vp3_idct_neon, export=1 - mov ip, lr - mov r2, r0 - bl vp3_idct_start_neon - bl vp3_idct_end_row_neon - mov r3, #8 - bl vp3_idct_core_neon - bl vp3_idct_end_col_neon - mov lr, ip - vpop {d8-d15} - - vshr.s16 q8, q8, #4 - vshr.s16 q9, q9, #4 - vshr.s16 q10, q10, #4 - vshr.s16 q11, q11, #4 - vshr.s16 q12, q12, #4 - vst1.64 {d16-d19}, [r0,:128]! - vshr.s16 q13, q13, #4 - vshr.s16 q14, q14, #4 - vst1.64 {d20-d23}, [r0,:128]! - vshr.s16 q15, q15, #4 - vst1.64 {d24-d27}, [r0,:128]! - vst1.64 {d28-d31}, [r0,:128]! - bx lr -endfunc - function ff_vp3_idct_put_neon, export=1 mov ip, lr bl vp3_idct_start_neon diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 15f184e406..46232d800a 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2701,12 +2701,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->idct_add= ff_jref_idct_add; c->idct = ff_j_rev_dct; c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; - }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) && - avctx->idct_algo==FF_IDCT_VP3){ - c->idct_put= ff_vp3_idct_put_c; - c->idct_add= ff_vp3_idct_add_c; - c->idct = ff_vp3_idct_c; - c->idct_permutation_type= FF_NO_IDCT_PERM; }else if(avctx->idct_algo==FF_IDCT_WMV2){ c->idct_put= ff_wmv2_idct_put_c; c->idct_add= ff_wmv2_idct_add_c; @@ -2867,12 +2861,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->h263_v_loop_filter= h263_v_loop_filter_c; } - if (CONFIG_VP3_DECODER) { - c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c; - c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; - c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c; - } - c->h261_loop_filter= h261_loop_filter_c; c->try_8x8basis= try_8x8basis_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 77980e02f8..c3a787c5f3 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -101,15 +101,6 @@ PUTAVG_PIXELS(10) #define ff_put_pixels16x16_c ff_put_pixels16x16_8_c #define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c -/* VP3 DSP functions */ -void ff_vp3_idct_c(DCTELEM *block/* align 16*/); -void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); -void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); -void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/); - -void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values); -void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values); - /* EA functions */ void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); @@ -391,10 +382,6 @@ typedef struct DSPContext { void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale); void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale); - void (*vp3_idct_dc_add)(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/); - void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values); - void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values); - /* assume len is a multiple of 4, and arrays are 16-byte aligned */ void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 31f4fb8ecd..e5d1d39d43 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -1,14 +1,13 @@ OBJS += ppc/dsputil_ppc.o \ +OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o + FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o ALTIVEC-OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o \ $(FFT-OBJS-yes) ALTIVEC-OBJS-$(CONFIG_H264DSP) += ppc/h264_altivec.o ALTIVEC-OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodec_altivec.o ALTIVEC-OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o -ALTIVEC-OBJS-$(CONFIG_VP3_DECODER) += ppc/vp3dsp_altivec.o -ALTIVEC-OBJS-$(CONFIG_VP5_DECODER) += ppc/vp3dsp_altivec.o -ALTIVEC-OBJS-$(CONFIG_VP6_DECODER) += ppc/vp3dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o ALTIVEC-OBJS += ppc/dsputil_altivec.o \ diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h index 63bb7602f8..7cbda36988 100644 --- a/libavcodec/ppc/dsputil_altivec.h +++ b/libavcodec/ppc/dsputil_altivec.h @@ -36,10 +36,6 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); -void ff_vp3_idct_altivec(DCTELEM *block); -void ff_vp3_idct_put_altivec(uint8_t *dest, int line_size, DCTELEM *block); -void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block); - void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index de347835e6..df70b015c8 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -185,12 +185,6 @@ void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) c->idct_put = ff_idct_put_altivec; c->idct_add = ff_idct_add_altivec; c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; - }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) && - avctx->idct_algo==FF_IDCT_VP3){ - c->idct_put = ff_vp3_idct_put_altivec; - c->idct_add = ff_vp3_idct_add_altivec; - c->idct = ff_vp3_idct_altivec; - c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; } } diff --git a/libavcodec/ppc/vp3dsp_altivec.c b/libavcodec/ppc/vp3dsp_altivec.c index 938502e8bd..0c493e98cd 100644 --- a/libavcodec/ppc/vp3dsp_altivec.c +++ b/libavcodec/ppc/vp3dsp_altivec.c @@ -18,6 +18,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavcodec/vp3dsp.h" + +#if HAVE_ALTIVEC + #include "libavutil/ppc/types_altivec.h" #include "libavutil/ppc/util_altivec.h" #include "libavcodec/dsputil.h" @@ -107,25 +114,7 @@ static inline vec_s16 M16(vec_s16 a, vec_s16 C) #define ADD8(a) vec_add(a, eight) #define SHIFT4(a) vec_sra(a, four) -void ff_vp3_idct_altivec(DCTELEM block[64]) -{ - IDCT_START - - IDCT_1D(NOP, NOP) - TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7); - IDCT_1D(ADD8, SHIFT4) - - vec_st(b0, 0x00, block); - vec_st(b1, 0x10, block); - vec_st(b2, 0x20, block); - vec_st(b3, 0x30, block); - vec_st(b4, 0x40, block); - vec_st(b5, 0x50, block); - vec_st(b6, 0x60, block); - vec_st(b7, 0x70, block); -} - -void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]) +static void vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]) { vec_u8 t; IDCT_START @@ -153,7 +142,7 @@ void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]) PUT(b7) } -void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) +static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) { LOAD_ZERO; vec_u8 t, vdst; @@ -183,3 +172,14 @@ void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) ADD(b6) dst += stride; ADD(b7) } + +#endif /* HAVE_ALTIVEC */ + +av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags) +{ + if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { + c->idct_put = vp3_idct_put_altivec; + c->idct_add = vp3_idct_add_altivec; + c->idct_perm = FF_TRANSPOSE_IDCT_PERM; + } +} diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c index 373f5e0b68..948167c2e8 100644 --- a/libavcodec/vp3.c +++ b/libavcodec/vp3.c @@ -40,6 +40,7 @@ #include "get_bits.h" #include "vp3data.h" +#include "vp3dsp.h" #include "xiph.h" #include "thread.h" @@ -135,6 +136,7 @@ typedef struct Vp3DecodeContext { AVFrame current_frame; int keyframe; DSPContext dsp; + VP3DSPContext vp3dsp; int flipped_image; int last_slice_end; int skip_loop_filter; @@ -1302,14 +1304,14 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye { /* do not perform left edge filter for left columns frags */ if (x > 0) { - s->dsp.vp3_h_loop_filter( + s->vp3dsp.h_loop_filter( plane_data + 8*x, stride, bounding_values); } /* do not perform top edge filter for top row fragments */ if (y > 0) { - s->dsp.vp3_v_loop_filter( + s->vp3dsp.v_loop_filter( plane_data + 8*x, stride, bounding_values); } @@ -1319,7 +1321,7 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye * in this frame (it will be filtered in next iteration) */ if ((x < width - 1) && (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { - s->dsp.vp3_h_loop_filter( + s->vp3dsp.h_loop_filter( plane_data + 8*x + 8, stride, bounding_values); } @@ -1329,7 +1331,7 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye * in this frame (it will be filtered in the next row) */ if ((y < height - 1) && (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { - s->dsp.vp3_v_loop_filter( + s->vp3dsp.v_loop_filter( plane_data + 8*x + 8*stride, stride, bounding_values); } @@ -1577,9 +1579,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) index = vp3_dequant(s, s->all_fragments + i, plane, 0, block); if (index > 63) continue; - if(s->avctx->idct_algo!=FF_IDCT_VP3) - block[0] += 128<<3; - s->dsp.idct_put( + s->vp3dsp.idct_put( output_plane + first_pixel, stride, block); @@ -1588,12 +1588,12 @@ static void render_slice(Vp3DecodeContext *s, int slice) if (index > 63) continue; if (index > 0) { - s->dsp.idct_add( + s->vp3dsp.idct_add( output_plane + first_pixel, stride, block); } else { - s->dsp.vp3_idct_dc_add(output_plane + first_pixel, stride, block); + s->vp3dsp.idct_dc_add(output_plane + first_pixel, stride, block); } } } else { @@ -1676,10 +1676,10 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx) if (avctx->pix_fmt == PIX_FMT_NONE) avctx->pix_fmt = PIX_FMT_YUV420P; avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; - if(avctx->idct_algo==FF_IDCT_AUTO) - avctx->idct_algo=FF_IDCT_VP3; ff_dsputil_init(&s->dsp, avctx); + ff_vp3dsp_init(&s->vp3dsp, avctx->flags); + ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm); ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); /* initialize to an impossible value which will force a recalculation diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c index 438ae76b57..395526e040 100644 --- a/libavcodec/vp3dsp.c +++ b/libavcodec/vp3dsp.c @@ -24,8 +24,10 @@ * source code. */ +#include "libavutil/attributes.h" #include "avcodec.h" #include "dsputil.h" +#include "vp3dsp.h" #define IdctAdjustBeforeShift 8 #define xC1S7 64277 @@ -210,19 +212,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int } } -void ff_vp3_idct_c(DCTELEM *block/* align 16*/){ - idct(NULL, 0, block, 0); -} - -void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ +static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ idct(dest, line_size, block, 1); } -void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ +static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ idct(dest, line_size, block, 2); } -void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){ +static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, + const DCTELEM *block/*align 16*/){ int i, dc = (block[0] + 15) >> 5; for(i = 0; i < 8; i++){ @@ -238,7 +237,8 @@ void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM } } -void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values) +static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, + int *bounding_values) { unsigned char *end; int filter_value; @@ -254,7 +254,8 @@ void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_valu } } -void ff_vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values) +static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, + int *bounding_values) { unsigned char *end; int filter_value; @@ -268,3 +269,21 @@ void ff_vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_valu first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value); } } + +av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags) +{ + c->idct_put = vp3_idct_put_c; + c->idct_add = vp3_idct_add_c; + c->idct_dc_add = vp3_idct_dc_add_c; + c->v_loop_filter = vp3_v_loop_filter_c; + c->h_loop_filter = vp3_h_loop_filter_c; + + c->idct_perm = FF_NO_IDCT_PERM; + + if (ARCH_ARM) + ff_vp3dsp_init_arm(c, flags); + if (ARCH_PPC) + ff_vp3dsp_init_ppc(c, flags); + if (ARCH_X86) + ff_vp3dsp_init_x86(c, flags); +} diff --git a/libavcodec/vp3dsp.h b/libavcodec/vp3dsp.h new file mode 100644 index 0000000000..3781bbf3a7 --- /dev/null +++ b/libavcodec/vp3dsp.h @@ -0,0 +1,40 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VP3DSP_H +#define AVCODEC_VP3DSP_H + +#include +#include "dsputil.h" + +typedef struct VP3DSPContext { + void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block); + void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block); + void (*idct_dc_add)(uint8_t *dest, int line_size, const DCTELEM *block); + void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values); + void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values); + + int idct_perm; +} VP3DSPContext; + +void ff_vp3dsp_init(VP3DSPContext *c, int flags); +void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags); +void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags); +void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags); + +#endif /* AVCODEC_VP3DSP_H */ diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c index ed5576310b..48c4a95ba7 100644 --- a/libavcodec/vp56.c +++ b/libavcodec/vp56.c @@ -411,7 +411,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha) case VP56_MB_INTRA: for (b=0; bdsp.idct_put(frame_current->data[plane] + s->block_offset[b], + s->vp3dsp.idct_put(frame_current->data[plane] + s->block_offset[b], s->stride[plane], s->block_coeff[b]); } break; @@ -424,7 +424,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha) s->dsp.put_pixels_tab[1][0](frame_current->data[plane] + off, frame_ref->data[plane] + off, s->stride[plane], 8); - s->dsp.idct_add(frame_current->data[plane] + off, + s->vp3dsp.idct_add(frame_current->data[plane] + off, s->stride[plane], s->block_coeff[b]); } break; @@ -442,7 +442,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha) plane = ff_vp56_b2p[b+ab]; vp56_mc(s, b, plane, frame_ref->data[plane], s->stride[plane], 16*col+x_off, 16*row+y_off); - s->dsp.idct_add(frame_current->data[plane] + s->block_offset[b], + s->vp3dsp.idct_add(frame_current->data[plane] + s->block_offset[b], s->stride[plane], s->block_coeff[b]); } break; @@ -666,10 +666,10 @@ av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha) s->avctx = avctx; avctx->pix_fmt = has_alpha ? PIX_FMT_YUVA420P : PIX_FMT_YUV420P; - if (avctx->idct_algo == FF_IDCT_AUTO) - avctx->idct_algo = FF_IDCT_VP3; ff_dsputil_init(&s->dsp, avctx); + ff_vp3dsp_init(&s->vp3dsp, avctx->flags); ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id); + ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm); ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct); for (i=0; i<4; i++) diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h index 0607e0d4ce..d03b733d19 100644 --- a/libavcodec/vp56.h +++ b/libavcodec/vp56.h @@ -30,6 +30,7 @@ #include "dsputil.h" #include "get_bits.h" #include "bytestream.h" +#include "vp3dsp.h" #include "vp56dsp.h" typedef struct vp56_context VP56Context; @@ -91,6 +92,7 @@ typedef struct { struct vp56_context { AVCodecContext *avctx; DSPContext dsp; + VP3DSPContext vp3dsp; VP56DSPContext vp56dsp; ScanTable scantable; AVFrame frames[4]; diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 1949074c58..1d2635749f 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -1,5 +1,6 @@ OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o +OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o MMX-OBJS += x86/dsputil_mmx.o \ diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index b695bd28ec..c1d158e166 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2476,20 +2476,6 @@ static void vector_clipf_sse(float *dst, const float *src, ); } -void ff_vp3_idct_mmx(int16_t *input_data); -void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block); -void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block); - -void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, - const DCTELEM *block); - -void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); -void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); - -void ff_vp3_idct_sse2(int16_t *input_data); -void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block); -void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block); - int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2, int order); int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, @@ -2681,14 +2667,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; } - - if (CONFIG_VP3_DECODER && HAVE_YASM) { - c->vp3_v_loop_filter = ff_vp3_v_loop_filter_mmx2; - c->vp3_h_loop_filter = ff_vp3_h_loop_filter_mmx2; - } } - if (CONFIG_VP3_DECODER && HAVE_YASM) - c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2; if (CONFIG_VP3_DECODER && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) { @@ -3064,20 +3043,6 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) } c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; #endif - } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || - CONFIG_VP6_DECODER) && - idct_algo == FF_IDCT_VP3 && HAVE_YASM) { - if (mm_flags & AV_CPU_FLAG_SSE2) { - c->idct_put = ff_vp3_idct_put_sse2; - c->idct_add = ff_vp3_idct_add_sse2; - c->idct = ff_vp3_idct_sse2; - c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; - } else { - c->idct_put = ff_vp3_idct_put_mmx; - c->idct_add = ff_vp3_idct_add_mmx; - c->idct = ff_vp3_idct_mmx; - c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; - } } else if (idct_algo == FF_IDCT_CAVS) { c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; } else if (idct_algo == FF_IDCT_XVIDMMX) { diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm index 791cc8ec35..58fa1f7b27 100644 --- a/libavcodec/x86/vp3dsp.asm +++ b/libavcodec/x86/vp3dsp.asm @@ -524,10 +524,6 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4 %endmacro %macro vp3_idct_funcs 3 -cglobal vp3_idct_%1, 1, 1, %2 - VP3_IDCT_%1 r0 - RET - cglobal vp3_idct_put_%1, 3, %3, %2 VP3_IDCT_%1 r2 %if ARCH_X86_64 diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c new file mode 100644 index 0000000000..cd8e206a2c --- /dev/null +++ b/libavcodec/x86/vp3dsp_init.c @@ -0,0 +1,65 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/vp3dsp.h" +#include "config.h" + +void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block); +void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block); + +void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block); +void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block); + +void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, + const DCTELEM *block); + +void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); +void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); + +av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) +{ +#if HAVE_YASM + int cpuflags = av_get_cpu_flags(); + + if (HAVE_MMX && cpuflags & AV_CPU_FLAG_MMX) { + c->idct_put = ff_vp3_idct_put_mmx; + c->idct_add = ff_vp3_idct_add_mmx; + c->idct_perm = FF_PARTTRANS_IDCT_PERM; + } + + if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) { + c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; + + if (!(flags & CODEC_FLAG_BITEXACT)) { + c->v_loop_filter = ff_vp3_v_loop_filter_mmx2; + c->h_loop_filter = ff_vp3_h_loop_filter_mmx2; + } + } + + if (cpuflags & AV_CPU_FLAG_SSE2) { + c->idct_put = ff_vp3_idct_put_sse2; + c->idct_add = ff_vp3_idct_add_sse2; + c->idct_perm = FF_TRANSPOSE_IDCT_PERM; + } +#endif +} From ffdd93a25e64db82c053577f415ea82c54fd5235 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Wed, 18 Jul 2012 13:22:46 +0100 Subject: [PATCH 9/9] ppc: fix build with altivec disabled Signed-off-by: Mans Rullgard --- libavcodec/ppc/vp3dsp_altivec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/ppc/vp3dsp_altivec.c b/libavcodec/ppc/vp3dsp_altivec.c index 0c493e98cd..75a36779ce 100644 --- a/libavcodec/ppc/vp3dsp_altivec.c +++ b/libavcodec/ppc/vp3dsp_altivec.c @@ -177,9 +177,11 @@ static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags) { - if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { +#if HAVE_ALTIVEC + if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { c->idct_put = vp3_idct_put_altivec; c->idct_add = vp3_idct_add_altivec; c->idct_perm = FF_TRANSPOSE_IDCT_PERM; } +#endif }