diff --git a/3rdparty/libpng/CMakeLists.txt b/3rdparty/libpng/CMakeLists.txt index fee9c99..eaf2095 100644 --- a/3rdparty/libpng/CMakeLists.txt +++ b/3rdparty/libpng/CMakeLists.txt @@ -33,6 +33,11 @@ if(ENABLE_NEON) add_definitions(-DPNG_ARM_NEON_OPT=2) endif() +if(ENABLE_SSE) + list(APPEND lib_srcs contrib/intel/intel_init.c contrib/intel/filter_sse2_intrinsics.c) + add_definitions(-DPNG_INTEL_SSE) +endif() + # ---------------------------------------------------------------------------------- # Define the library target: # ---------------------------------------------------------------------------------- diff --git a/3rdparty/libpng/pngpriv.h b/3rdparty/libpng/pngpriv.h index fe3355d..c0aa785 100644 --- a/3rdparty/libpng/pngpriv.h +++ b/3rdparty/libpng/pngpriv.h @@ -182,6 +182,42 @@ # endif #endif /* PNG_ARM_NEON_OPT > 0 */ +#ifndef PNG_INTEL_SSE_OPT +# ifdef PNG_INTEL_SSE + /* Only check for SSE if the build configuration has been modified to + * enable SSE optimizations. This means that these optimizations will + * be off by default. See contrib/intel for more details. + */ +# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ + defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 2) +# define PNG_INTEL_SSE_OPT 1 +# endif +# endif +#endif + +#if defined(PNG_INTEL_SSE_OPT) && PNG_INTEL_SSE_OPT > 0 +# ifndef PNG_INTEL_SSE_IMPLEMENTATION +# if defined(__SSE4_1__) || defined(__AVX__) + /* We are not actually using AVX, but checking for AVX is the best + way we can detect SSE4.1 and SSSE3 on MSVC. + */ +# define PNG_INTEL_SSE_IMPLEMENTATION 3 +# elif defined(__SSSE3__) +# define PNG_INTEL_SSE_IMPLEMENTATION 2 +# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 2) +# define PNG_INTEL_SSE_IMPLEMENTATION 1 +# else +# define PNG_INTEL_SSE_IMPLEMENTATION 0 +# endif +# endif + +# if PNG_INTEL_SSE_IMPLEMENTATION > 0 +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 +# endif +#endif + /* Is this a build of a DLL where compilation of the object modules requires * different preprocessor settings to those required for a simple library? If * so PNG_BUILD_DLL must be set. @@ -457,7 +493,7 @@ /* Memory model/platform independent fns */ #ifndef PNG_ABORT -# ifdef _WINDOWS_ +# if defined(_WINDOWS_) && !defined(WINRT) # define PNG_ABORT() ExitProcess(0) # else # define PNG_ABORT() abort() @@ -1190,6 +1226,21 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_neon,(png_row_infop PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_neon,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); #endif + +#if defined(PNG_INTEL_SSE_IMPLEMENTATION) && PNG_INTEL_SSE_IMPLEMENTATION > 0 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +#endif /* Choose the best filter to use and filter the row data */ PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr, @@ -1919,6 +1970,10 @@ PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_neon, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); # endif +# if defined(PNG_INTEL_SSE_IMPLEMENTATION) && PNG_INTEL_SSE_IMPLEMENTATION > 0 +PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2, + (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); +# endif #endif PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,